.\" disable justification (adjust text to left margin only)
.ad l
.SH "THIS MANUAL IS FOR V6.2XX (2007)"
-Recent versions (v6.8xx; 2010 Nov.) have more features than those described below.
+Recent versions (v7.1xx; 2013 Jan.) have more features than those described below.
See also the tips page at
http://mafft.cbrc.jp/alignment/software/tips0.html
.SH "NAME"
--- /dev/null
+#!/usr/bin/perl
+
+#####################################################################
+# Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp)
+#
+# Ver. Date Changelog
+#####################################################################
+# 1.0 07.26.13 Initial release
+# 2.0 09.03.13 Added extensive warnings and error messages
+# 3.0 10.28.13 Fix for retrieving large files. Added STDERR logs
+# 3.1 11.08.13 Added LWP failsafe. Made hat3 not a required output
+# 3.2 12.08.14 Removed 5-char restriction for own structure files
+#
+#####################################################################
+
+use strict;
+use Getopt::Long;
+use File::Path qw(make_path remove_tree);
+use LWP::Simple;
+use LWP::UserAgent;
+
+# to prevent error 'Header line too long (limit is 8192)' [v3.1]
+use LWP::Protocol::http;
+push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0);
+
+
+
+my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/MAFFTash/REST/service.cgi/premafft";
+
+my ( $WORKDIR, $PDBLIST, $OWNLIST, $HAT3FILE, $INSTRFILE );
+
+GetOptions
+(
+ 'd=s' => \$WORKDIR,
+ 'p=s' => \$PDBLIST,
+ 'o=s' => \$OWNLIST,
+ 'h=s' => \$HAT3FILE,
+ 'i=s' => \$INSTRFILE,
+);
+
+print STDERR "[MAFFTash-premafft]\n";
+
+# set temp directory
+my $TMP = "/tmp/mapremafft$$";
+make_path($TMP) unless -d $TMP;
+
+
+
+######
+# validation
+&help("Required parameter : atleast one of either '-p' or '-o'") unless ( defined $PDBLIST || defined $OWNLIST);
+&help("Required parameter : '-d'") if defined $OWNLIST && ! defined $WORKDIR;
+
+$HAT3FILE = "hat3" unless defined $HAT3FILE;
+$INSTRFILE = "instr" unless defined $INSTRFILE;
+chop $WORKDIR if defined $WORKDIR && $WORKDIR =~ m/\/$/g;
+
+
+######
+# prepare inputs
+print STDERR "Preparing inputs for service request...\n";
+
+my @files = ();
+push(@files, "strweight" => "0.5");
+push(@files, "premafft" => "1");
+
+
+# pdb entries
+if ( defined $PDBLIST )
+{
+ print STDERR "PDB List defined!\n";
+ &bail("Error: Input file $PDBLIST does not exists!") unless -e $PDBLIST;
+ my $listfile = "$TMP/pdblist.inp";
+
+
+ open(INPF,"<$PDBLIST") or &bail("Error: Cannot open file $PDBLIST for reading!");
+ open(OUTF,">$listfile") or &bail("Error: Cannot open temporary file $listfile for writing!");
+
+ while(<INPF>)
+ {
+ chomp;
+ if ( /^(\w{5})$/ )
+ {
+ print OUTF ">PDBID\n$1\n";
+ }
+ }
+
+ close OUTF;
+ close INPF;
+
+ push(@files, "inputfile" => ["$listfile"]);
+}
+
+
+
+# upload own structures
+my %ownids = ();
+
+if ( defined $OWNLIST )
+{
+ print STDERR "OWN List defined!\n";
+ &bail("Error: Input file $OWNLIST does not exists!") unless -e $OWNLIST;
+
+
+ open(OWNINPF,"<$OWNLIST") or &bail("Error: Cannot open file $OWNLIST for reading!");
+
+ while(<OWNINPF>)
+ {
+ chomp;
+
+ if ( /^(\S+)$/ )
+ {
+ my $fileref = "$WORKDIR/$1.pdb";
+
+ unless (-e $fileref)
+ {
+ close OWNINPF;
+ &bail("Error: File $fileref does not exists!");
+ }
+
+ push(@files, "inputownfile[]" => ["$fileref"]);
+ $ownids{$1} = 1;
+ }
+ }
+
+ close OWNINPF;
+}
+
+
+
+######
+# start rest service
+print STDERR "Sending service request...\n";
+
+my $browser = LWP::UserAgent->new;
+$browser->timeout(0);
+
+
+# post: running a mafftash job
+my $postResponse = $browser->post( $BASEURL, \@files, 'Content_Type' => 'form-data' );
+&bail(sprintf("[%d] %s\n", $postResponse->code, &parseError($postResponse->content))) unless($postResponse->is_success);
+
+
+# get response from post request
+my ($status, $mafftashid) = &parseResponse($postResponse->content);
+
+
+
+my $MAXTRIES = 3;
+my $STIMER = 4;
+my $longtimer = 0;
+
+print STDERR "Request sent! Waiting for response...[$mafftashid]\n";
+
+
+# wait for results until it becomes available
+while(1)
+{
+ $longtimer = $longtimer <= ($STIMER*3) ? $longtimer+$STIMER : $STIMER;
+ sleep $longtimer;
+
+
+ # get: get results for mafftash job
+ my $getResponse = $browser->get("$BASEURL/$mafftashid");
+
+ if ( $getResponse->is_success )
+ {
+
+ # get response from get request
+ ($status, $mafftashid) = &parseResponse($getResponse->content);
+ next unless ( $status eq "done" );
+
+
+ # if job is finished and ready
+ print STDERR "Results found!\n";
+ my $csfile = "$TMP/checksum.tar.gz";
+ my $try1 = 1;
+
+
+ while(1)
+ {
+ print STDERR "Fetching Results... [Trial $try1]\n";
+
+ if ( is_success(getstore("$BASEURL/getmdlist/$mafftashid", $csfile)) && -e $csfile && -s $csfile )
+ {
+ # get response from get request
+ my $checklist = &extractchecksum($csfile);
+ &bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 );
+
+
+ foreach my $id ( keys %$checklist )
+ {
+ my $checkfile = "$TMP/$id";
+ my $checkid = $checklist->{$id};
+ my $try2 = 1;
+
+ while(1)
+ {
+ unlink $checkfile if -e $checkfile;
+
+ if ( is_success(getstore("$BASEURL/get/$mafftashid/$id", $checkfile)) && -e $checkfile && -s $checkfile )
+ {
+ my $hashid = &getchecksum($checkfile);
+ #print STDERR "[hashid]$hashid [checkid]$checkid\n";
+
+ if ($hashid ne "" && $hashid ne $checkid )
+ {
+ unlink $checkfile if -e $checkfile;
+ &bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES;
+ $try2++;
+ sleep $STIMER;
+ }
+ else
+ {
+ last;
+ }
+ }
+ else
+ {
+ &bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES;
+ $try2++;
+ sleep $STIMER;
+ }
+ }
+ }
+
+ last;
+ }
+ else
+ {
+ &bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES;
+ $try1++;
+ sleep $STIMER;
+ }
+ }
+
+ last;
+
+ }
+ else
+ {
+ &bail(sprintf("[%d] %s\n", $getResponse->code, &parseError($getResponse->content)));
+ }
+
+}
+
+
+# make sure outputs were generated
+# decompress
+print STDERR "Assembling final results...\n";
+
+&backticks("cat $TMP/archive.tar.gz* | tar -zxf - -C $TMP/");
+&backticks("mv -f $TMP/instr $INSTRFILE") if -e "$TMP/instr";
+&backticks("mv -f $TMP/hat3 $HAT3FILE") if -e "$TMP/hat3";
+
+# sometimes no hat3 file is generated [v3.1]
+#&bail("Error: Output file $HAT3FILE not found!") unless -e $HAT3FILE;
+&bail("Error: Output file $INSTRFILE not found!") unless -e $INSTRFILE;
+
+
+# warn if some ownids were ommitted
+if ( scalar keys(%ownids) > 0 )
+{
+ my %instrids = ();
+
+ open(INSTRF,"<$INSTRFILE") or &bail("Error: Cannot open file $INSTRFILE for reading!");
+
+ while(<INSTRF>)
+ {
+ chomp;
+
+ if ( /^>\d+_(\S+)$/ )
+ {
+ $instrids{$1} = 1;
+ }
+ }
+
+ close INSTRF;
+
+ foreach my $id ( keys %ownids )
+ {
+ warn "Warning: Own structure $id was excluded from instr/hat3.\n" unless $instrids{$id};
+ }
+
+}
+
+
+
+&cleanup();
+
+
+
+####################
+####################
+
+
+
+sub parseResponse
+{
+ my $response = shift;
+
+ #"status":"wait","mafftashid":"Ma8211432R"
+
+ my $status = "";
+ my $mafftashid = "";
+
+ if ( $response =~ /^([^\s:]+):([^\s:]+)$/ )
+ {
+ $mafftashid = $1;
+ $status = $2;
+ }
+
+ return ($status, $mafftashid);
+
+}
+
+
+sub extractchecksum
+{
+ my $infile = shift;
+ my %dataset = ();
+
+ open CSUM, "tar -zxf $infile -O|" or return \%dataset;
+
+ while(<CSUM>)
+ {
+ chomp;
+ if ( /^(\S+)\s+(\S+)$/ )
+ {
+ $dataset{$2} = $1;
+ }
+
+ }
+
+ close CSUM;
+
+ return \%dataset;
+
+}
+
+
+sub parseError
+{
+ my $response = shift;
+
+ #"error":"Invalid number of inputs found."
+ my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : "";
+ return $errorstr;
+}
+
+
+sub getchecksum
+{
+ my $infile = shift;
+
+ # md5 binary check
+ my $MD5BIN = "";
+
+ if ( -x "/usr/bin/md5sum" )
+ {
+ $MD5BIN = "/usr/bin/md5sum";
+ }
+ elsif ( -x "/sbin/md5" )
+ {
+ $MD5BIN = "/sbin/md5 -q";
+ }
+
+ return "" if $MD5BIN eq "";
+
+
+ my $checksum = "";
+ open MD5EXE, "$MD5BIN $infile|" or return "";
+
+ while(<MD5EXE>)
+ {
+ if (/^(\S+)\s+(\S+)$/)
+ {
+ $checksum = $1;
+ last;
+ }
+ elsif (/^(\S+)$/)
+ {
+ $checksum = $1;
+ last;
+ }
+ }
+
+ close MD5EXE;
+
+ return $checksum;
+
+}
+
+
+sub backticks
+{
+ my $command = shift;
+
+ `$command`;
+ return ($? == -1) ? 0 : 1;
+}
+
+
+sub bail
+{
+ my $str = shift;
+ print STDERR "$str\n" if defined $str;
+
+ &cleanup();
+ exit(1);
+}
+
+
+sub cleanup
+{
+ return if ($TMP eq "" || !-d $TMP);
+
+ opendir(MAINDIR, $TMP);
+ my @files = readdir(MAINDIR);
+ closedir(MAINDIR);
+
+ foreach my $file (@files)
+ {
+ unlink "$TMP/$file" if -e "$TMP/$file";
+ }
+
+ remove_tree($TMP);
+
+}
+
+
+sub help
+{
+ my $str = shift;
+
+ print <<'HELPME';
+
+USAGE
+ ./mafftash_premafft.pl -p [FILE]
+ ./mafftash_premafft.pl -o [FILE] -d [DIRECTORY]
+ ./mafftash_premafft.pl -p [FILE] -o [FILE] -d [DIRECTORY]
+
+
+PARAMETERS
+ -p [FILE]
+ FILE contains a list of PDBIDs (one entry per line); make sure that the PDBIDs are in the standard 5-character pdbid+chain naming format
+
+ -o [FILE] -d [DIRECTORY]
+ FILE contains a list of IDs from your own structure/pdb files (one entry per line)
+ for each ID in the list make sure that a corresponding structure file (same ID with .pdb extension) is stored in DIRECTORY
+
+ -h [HATFILE]
+ save the output hat3 file in HATFILE; if not set, the output is written to a file named 'hat3' in your current directory
+
+ -i [INSTRFILE]
+ save the output instr file in INSTRFILE; if not set, the output is written to a file named 'instr' in your current directory
+
+HELPME
+
+ &bail($str);
+}
+
+
+
--- /dev/null
+#!/usr/bin/perl
+
+####################################################################################
+# Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp)
+#
+# Ver. Date Changelog
+####################################################################################
+# 1.0 11.01.13 Initial release
+#
+# **Skipped version 2 to standardise version numbers to seekquencer.pl script**
+#
+# 3.0 04.24.14 Added split option -mod <mafftash-split> for output
+# Uses seekquencer_v3 backend
+#
+# 4.0 05.12.14 Added new options: -run <thread|normal> -trd <count> -noin
+# Sets -seqa fast in seekquencer.pl
+# Uses seekquencer_v4 backend
+#
+# 4.1 05.19.14 Added a check on running REST requests before proceeding
+# to avoid server load problems
+#
+# 4.2 05.27.14 Seq limit processing done in seekquencer.pl script
+# to avoid server load problems
+#
+# 4.3 07.22.14 Added new option: -seqd <uniref100|uniref90|uniref70|uniprot>
+# Blast limit changed from factor of 10 to -blim option
+# Timing on sleep changed; added srand() for making seed
+# Moved the job limit processing to server side
+#
+# 4.4 08.05.14 Modified to work in multiple OS
+#
+#
+####################################################################################
+
+use strict;
+use Getopt::Long;
+use File::Path qw(make_path remove_tree);
+use Cwd;
+use LWP::Simple;
+use LWP::UserAgent;
+
+# to prevent error: Header line too long (limit is 8192)
+use LWP::Protocol::http;
+push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0);
+
+
+
+my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/REST/service.cgi/premafft";
+my ( $INPUTFILE, $IDLISTFILE, $SEQFASTAFILE, $OUTPUTFILE, $SEQFLAG, $STRFLAG, $EVALFLAG, $NOINFLAG );
+my $OUTTYPE = "mafftash";
+
+my $SEQDATABASE = "uniref100";
+my $SEQLIMIT = 100;
+my $SEQBLASTLIMIT = 100;
+
+my $RUNMODE = "normal"; # thread|normal
+my $THREADCOUNT = 3;
+
+
+GetOptions
+(
+ 'inp=s' => \$INPUTFILE,
+ 'idf=s' => \$IDLISTFILE,
+ 'seqf=s' => \$SEQFASTAFILE,
+ 'out=s' => \$OUTPUTFILE,
+ 'str' => \$STRFLAG,
+ 'seq' => \$SEQFLAG,
+ 'seqd=s' => \$SEQDATABASE,
+ 'lim=i' => \$SEQLIMIT,
+ 'blim=i' => \$SEQBLASTLIMIT,
+ 'pre' => \$EVALFLAG,
+ 'noin' => \$NOINFLAG,
+ 'mod=s' => \$OUTTYPE,
+ 'run=s' => \$RUNMODE,
+ 'trd=i' => \$THREADCOUNT,
+
+
+);
+
+my $ISWINDOWS = ( $^O =~ /^MSWin/ ) ? 1 : 0;
+print STDERR "[Seekquencer-premafft 4.4 on $^O]\n";
+
+
+# set temp directory
+my $CWD = getcwd;
+my $TMP = "$CWD/seekpremafft$$";
+make_path($TMP) unless -d $TMP;
+
+
+
+######
+# validation
+help("Required parameter: define input as '-inp' or '-idf' or '-seqf'") if ( !defined $INPUTFILE && !defined $IDLISTFILE && !defined $SEQFASTAFILE );
+help("'-inp' is already defined") if ( defined $INPUTFILE && (defined $IDLISTFILE || defined $SEQFASTAFILE) );
+help("Input file $INPUTFILE does not exist (or filesize is 0)") if ( defined $INPUTFILE && (! -e $INPUTFILE || !-s $INPUTFILE) );
+help("Input file $IDLISTFILE does not exist (or filesize is 0)") if ( defined $IDLISTFILE && (! -e $IDLISTFILE || !-s $IDLISTFILE) );
+help("Input file $SEQFASTAFILE does not exist (or filesize is 0)") if ( defined $SEQFASTAFILE && (! -e $SEQFASTAFILE || !-s $SEQFASTAFILE) );
+help("Required parameter: output file '-out'") unless ( defined $OUTPUTFILE );
+help("Set either '-str' or '-seq' or dont set any at all") if ( defined $STRFLAG && defined $SEQFLAG );
+
+help("Invalid value for '-seqd <uniref100|uniref90|uniref70|uniprot>'") if ( $SEQDATABASE ne "uniref100" && $SEQDATABASE ne "uniref90" && $SEQDATABASE ne "uniref70" && $SEQDATABASE ne "uniprot");
+help("Invalid value for '-mod <fasta|mafftash|mafftash-split>'") if ( $OUTTYPE ne "fasta" && $OUTTYPE ne "mafftash" && $OUTTYPE ne "mafftash-split" );
+help("Invalid value for '-run <thread|normal>'") if ( $RUNMODE ne "thread" && $RUNMODE ne "normal" );
+help("Invalid value for '-trd <count>'; count should be between 1 and 5 (inclusive)") if ( $RUNMODE eq "thread" && ($THREADCOUNT <= 0 || $THREADCOUNT > 5) );
+
+
+######
+# check existing requests
+print STDERR "Checking server status...\n";
+
+# generate seed
+srand($$);
+
+# sleep a bit to give time for lsf response
+sleep(int(rand(6))+1);
+
+
+my $browser = LWP::UserAgent->new;
+$browser->timeout(0);
+
+# get: check if you can send a new request this time
+my $jobsResponse = $browser->get("$BASEURL/isAllowed");
+
+if ( $jobsResponse->is_success )
+{
+ my $status = parseJobQueryResponse($jobsResponse->content);
+ bail("Max jobs reached. The server cannot process your request right now; try again later.", 0) unless $status > 0;
+}
+else
+{
+ bail(sprintf("[%d] %s\n", $jobsResponse->code, parseError($jobsResponse->content)));
+}
+
+
+######
+# make a temporary input if lists were provided
+unless ( defined $INPUTFILE )
+{
+ $INPUTFILE = "$TMP/input.homemade";
+ open INPF, ">$INPUTFILE" or bail("Error writing to input file.");
+
+ if ( defined $IDLISTFILE )
+ {
+ open IDLIST, "<$IDLISTFILE" or bail("Error reading input file.");
+ while( <IDLIST> )
+ {
+ chomp;
+ if ( /(\w{5})/ )
+ {
+ print INPF ">PDBID\n$1\n";
+ }
+ }
+ close IDLIST;
+ }
+
+
+ if ( defined $SEQFASTAFILE )
+ {
+ open FASTA, "<$SEQFASTAFILE" or bail("Error reading input file.");
+ while( <FASTA> )
+ {
+ chomp;
+ print INPF "$_\n";
+ }
+ close FASTA;
+ }
+
+ close INPF;
+}
+
+
+######
+# prepare parameters
+print STDERR "Preparing parameters for service request...\n";
+
+my @parameters = ();
+push(@parameters, "fileinput" => ["$INPUTFILE"]);
+push(@parameters, "out_type" => $OUTTYPE);
+
+push(@parameters, "rest_flag" => "1");
+push(@parameters, "cls_flag" => "1");
+push(@parameters, "pre_flag" => "1") if defined $EVALFLAG;
+push(@parameters, "noin_flag" => "1") if defined $NOINFLAG;
+
+push(@parameters, "run_mode" => $RUNMODE);
+push(@parameters, "thread_count" => $THREADCOUNT) if $RUNMODE eq "thread";
+
+
+if ( defined $STRFLAG )
+{
+ push(@parameters, "str_flag" => "1");
+ push(@parameters, "ash_flag" => "1");
+}
+elsif ( defined $SEQFLAG )
+{
+ push(@parameters, "seq_flag" => "1");
+ push(@parameters, "seq_algorithm" => "fast");
+ push(@parameters, "seq_database" => $SEQDATABASE);
+ push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT);
+ push(@parameters, "seq_outputlimit" => $SEQLIMIT);
+}
+else
+{
+ push(@parameters, "str_flag" => "1");
+ push(@parameters, "ash_flag" => "1");
+ push(@parameters, "seq_flag" => "1");
+ push(@parameters, "seq_algorithm" => "fast");
+ push(@parameters, "seq_database" => $SEQDATABASE);
+ push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT);
+ push(@parameters, "seq_outputlimit" => $SEQLIMIT);
+}
+
+
+
+######
+# start rest service
+print STDERR "Sending service request...\n";
+
+# post: running a mafftash job
+my $postResponse = $browser->post( $BASEURL, \@parameters, 'Content_Type' => 'form-data' );
+bail(sprintf("[%d] %s\n", $postResponse->code, parseError($postResponse->content))) unless($postResponse->is_success);
+
+
+# get response from post request
+my ($status, $seekid) = parseResponse($postResponse->content);
+
+my $MAXTRIES = 3;
+my $STIMER = 5;
+my $timer = 0;
+
+print STDERR "Request sent! Waiting for response...[$seekid]\n";
+
+my $checklist = {};
+
+# wait for results until it becomes available
+while(1)
+{
+ # sleeps for 5+random, 10+random, 15+random, 20+random, 25+random, 30+random ,,, 60+random, 60+random,,,
+ $timer = $timer >= 60 ? 60 : $timer+$STIMER;
+ sleep($timer+int(rand(4)));
+
+ # get: get results for mafftash job
+ my $getResponse = $browser->get("$BASEURL/$seekid");
+
+ if ( $getResponse->is_success )
+ {
+
+ # get response from get request
+ ($status, $seekid) = parseResponse($getResponse->content);
+ next unless ( $status eq "done" );
+
+
+ # if job is finished and ready
+ print STDERR "Results found!\n";
+ my $csfile = "$TMP/checksum";
+ my $try1 = 1;
+
+
+ while(1)
+ {
+ print STDERR "Fetching Results... [Trial $try1]\n";
+
+ if ( is_success(getstore("$BASEURL/get/$seekid/checksum", $csfile)) && -e $csfile && -s $csfile )
+ {
+ # get response from get request
+ $checklist = extractchecksum($csfile);
+ bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 );
+
+
+ foreach my $id ( sort keys %$checklist )
+ {
+ sleep 1;
+ my $checkfile = "$TMP/$id";
+ my $checkid = $checklist->{$id};
+ my $try2 = 1;
+
+ while(1)
+ {
+ unlink $checkfile if -e $checkfile;
+
+ if ( is_success(getstore("$BASEURL/get/$seekid/$id", $checkfile)) && -e $checkfile && -s $checkfile )
+ {
+ last if $ISWINDOWS;
+
+ my $hashid = getchecksum($checkfile);
+ #print STDERR "[hashid]$hashid [checkid]$checkid\n";
+
+ if ($hashid ne "" && $hashid ne $checkid )
+ {
+ #unlink $checkfile if -e $checkfile;
+ bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES;
+ $try2++;
+ sleep $STIMER;
+ }
+ else
+ {
+ last;
+ }
+ }
+ else
+ {
+ bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES;
+ $try2++;
+ sleep $STIMER;
+ }
+ }
+ }
+
+ last;
+ }
+ else
+ {
+ bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES;
+ $try1++;
+ sleep $STIMER;
+ }
+ }
+
+ last;
+
+ }
+ else
+ {
+ bail(sprintf("[%d] %s\n", $getResponse->code, parseError($getResponse->content)));
+ }
+
+}
+
+
+# make sure outputs were generated
+# decompress
+print STDERR "Assembling final results...\n";
+
+foreach my $id ( sort keys %$checklist )
+{
+ if ( $id =~ /^$seekid\.out(\.str|\.seq)?/ )
+ {
+ bail("Error: Output file corrupted!") unless -e "$TMP/$id";
+ appendToFile("$TMP/$id","$OUTPUTFILE".$1);
+ }
+}
+
+cleanup();
+
+
+
+####################
+####################
+
+
+sub parseResponse
+{
+ my $response = shift;
+ my $status = "";
+ my $seekid = "";
+
+ if ( $response =~ /^([^\s:]+):([^\s:]+)$/ )
+ {
+ $seekid = $1;
+ $status = $2;
+ }
+
+ return ($status, $seekid);
+}
+
+
+sub parseJobQueryResponse
+{
+ my $response = shift;
+ my $jobs = 100;
+
+ if ( $response =~ /^(\d+)$/ )
+ {
+ $jobs = $1;
+ }
+
+ return $jobs;
+}
+
+
+sub extractchecksum
+{
+ my $infile = shift;
+ my %dataset = ();
+
+ #open CSUM, "tar -zxf $infile -O|" or return \%dataset;
+ open CSUM, "<$infile" or return \%dataset;
+
+ while(<CSUM>)
+ {
+ chomp;
+ if ( /^(\S+)\s+(\S+)$/ )
+ {
+ $dataset{$2} = $1;
+ }
+ }
+
+ close CSUM;
+
+ return \%dataset;
+}
+
+
+sub parseError
+{
+ my $response = shift;
+
+ #"error":"Invalid number of inputs found."
+ my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : $response;
+ return $errorstr;
+}
+
+
+sub getchecksum
+{
+ my $infile = shift;
+
+ # md5 binary check
+ my $MD5BIN = "";
+
+ if ( -x "/usr/bin/md5sum" )
+ {
+ $MD5BIN = "/usr/bin/md5sum";
+ }
+ elsif ( -x "/sbin/md5" )
+ {
+ $MD5BIN = "/sbin/md5 -q";
+ }
+
+ return "" if $MD5BIN eq "";
+
+
+ my $checksum = "";
+ open MD5EXE, "$MD5BIN $infile|" or return "";
+
+ while(<MD5EXE>)
+ {
+ if (/^(\S+)\s+(\S+)$/)
+ {
+ $checksum = $1;
+ last;
+ }
+ elsif (/^(\S+)$/)
+ {
+ $checksum = $1;
+ last;
+ }
+ }
+
+ close MD5EXE;
+
+ return $checksum;
+
+}
+
+
+sub backticks
+{
+ my $command = shift;
+
+ `$command`;
+ return ($? == -1) ? 0 : 1;
+}
+
+
+sub bail
+{
+ my $str = shift;
+ my $status = shift;
+
+ #0 for success and 1 for error
+ $status = 1 unless defined;
+
+ print STDERR "$str\n" if defined $str;
+
+ cleanup();
+
+ exit($status);
+}
+
+
+sub cleanup
+{
+ return if ($TMP eq "" || !-d $TMP);
+
+ opendir(MAINDIR, $TMP);
+ my @files = readdir(MAINDIR);
+ closedir(MAINDIR);
+
+ foreach my $file (@files)
+ {
+ unlink "$TMP/$file" if -e "$TMP/$file";
+ }
+
+ remove_tree($TMP);
+
+}
+
+
+sub appendToFile
+{
+ my $inpfile = shift;
+ my $outfile = shift;
+
+ open INPF, "<$inpfile" or bail("Server Error: Error in reading file.");
+ open OUTF, ">>$outfile" or bail("Server Error: Error in writing to file.");
+
+ while(<INPF>)
+ {
+ print OUTF $_;
+ }
+
+ close OUTF;
+ close INPF;
+}
+
+
+
+sub help
+{
+ my $str = shift;
+
+ print <<'HELPME';
+
+USAGE
+ ./seekquencer_premafft.pl -inp <INFILE> -out <OUTFILE> [-str|-seq]
+ ./seekquencer_premafft.pl -idf <LISTFILE> -seqf <SEQFASTA> -out <OUTFILE> [-str|-seq]
+
+
+PARAMETERS
+ -inp <INFILE>
+ INFILE is a FASTA-formatted file
+ PDB entries are written as:
+ >PDBID
+ [5-character pdbid+chain]
+
+ While sequence entries are written as:
+ >[id]
+ [sequence]
+
+ -idf <LISTFILE>
+ IDLISTFILE is a file containing a list of pdbids
+ pdbids should be a 5-character pdbid + chain
+
+ -seqf <SEQFASTA>
+ SEQFASTA is a fasta file
+ entries are written as:
+ >[id]
+ [sequence]
+
+ -out <OUTFILE>
+ Results are writen to a file named OUTFILE
+
+ -str
+ Only structures will be collected by Seekquencer
+ If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer
+
+ -seq
+ Only sequences will be collected by Seekquencer
+ If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer
+
+
+OPTIONAL PARAMETERS:
+ -seqd <uniref100|uniref90|uniref70|uniprot>
+ Search Database for sequence homologs. Default value: uniref100
+
+ -lim <count>
+ this sets the maximum number of sequence homologs collected. Default value: 100
+
+ -blim <count>
+ this sets the -b and -v value when running blastall. Default value: 100
+
+ -pre
+ When -str is set, this will compare all structures against all using pdp-ash
+ This would ensure that all structures collected are matching
+ All structures that do not match will be removed
+
+ -noin
+ When set, inputs will not be included in the output
+
+ -mod <mafftash|mafftash-split|fasta>
+ Defines the output format
+ mafftash (default) will print a mafftash-formatted fasta file
+ mafftash-split will make 2 files separating the structures (OUTFILE.str) from sequences (OUTFILE.seq)
+ fasta will print a regular fasta file
+
+ -run <thread|normal>
+ thread will run simultaneous jobs during blast queries (faster but takes more nodes)
+ normal will run sequential blast queries (slower but takes less nodes)
+ Default value: normal
+
+ -trd <count>
+ if -run <thread> is defined, this sets the number of parallel jobs to run. Default value: 3
+
+
+HELPME
+
+ bail($str);
+}
+
#define DEFAULTGOP_N -1530
#define DEFAULTGEP_N 0
-#define DEFAULTOFS_N -123
+#define DEFAULTOFS_N -369
#define DEFAULTPAMN 200
#define DEFAULTRNAGOP_N -1530
--- /dev/null
+#include "mltaln.h"
+#include "dp.h"
+
+#define DEBUG 0
+
+#define WMCHECK 1
+#define ALGZSTRAIGHT 0
+#define ALGZGAP 0
+#define USEGAPLENMTX 0
+#define USEGAPLENHALF 0
+#define FREEFREQUENTLY 1
+
+#define IDATEND 0
+
+
+#define MACHIGAI 0
+#define OUTGAP0TRY 0
+#define XXXXXXX 0
+#define USE_PENALTY_EX 0
+#define FASTMATCHCALC 1
+#define SLOW 0
+
+#define zero 0
+#define one 1
+
+#if USEGAPLENHALF
+#define USEGAPLENHALFORMTX 1
+#endif
+#if USEGAPLENMTX
+#define USEGAPLENHALFORMTX 1
+#endif
+
+
+#if WMCHECK
+static int PFACERROR = 0;
+#endif
+
+
+static TLS double **impmtx = NULL;
+static TLS int impalloclen = 0;
+
+double imp_match_out_scD( int i1, int j1 )
+{
+// fprintf( stderr, "imp+match = %f\n", impmtx[i1][j1] * fastathreshold );
+// fprintf( stderr, "val = %f\n", impmtx[i1][j1] );
+ return( impmtx[i1][j1] );
+}
+
+typedef struct _gaplenvec
+{
+ int relend;
+#if USEGAPLENHALFORMTX
+ int idatend;
+#endif
+ int idatnext;
+ int idatprev;
+ int npat;
+ int len; // sukoshi muda.
+ double freq;
+} Gaplen;
+
+#if 0
+static void imp_match_out_vead_gapmap( double *imp, int i1, int lgth2, int *gapmap2 )
+{
+#if FASTMATCHCALC
+ double *pt = impmtx[i1];
+ int *gapmappt = gapmap2;
+ while( lgth2-- )
+ *imp++ += pt[*gapmappt++];
+#else
+ int j;
+ double *pt = impmtx[i1];
+ for( j=0; j<lgth2; j++ )
+ *imp++ += pt[gapmap2[j]];
+#endif
+}
+
+
+static void imp_match_out_vead_tate_gapmap( double *imp, int j1, int lgth1, int *gapmap1 )
+{
+#if FASTMATCHCALC
+ int *gapmappt = gapmap1;
+ while( lgth1-- )
+ *imp++ += impmtx[*gapmappt++][j1];
+#else
+ int i;
+ for( i=0; i<lgth1; i++ )
+ *imp++ += impmtx[gapmap1[i]][j1];
+#endif
+}
+#endif
+
+static void imp_match_out_vead( double *imp, int i1, int lgth2 )
+{
+#if FASTMATCHCALC
+ double *pt = impmtx[i1];
+ while( lgth2-- )
+ *imp++ += *pt++;
+#else
+ int j;
+ double *pt = impmtx[i1];
+ for( j=0; j<lgth2; j++ )
+ *imp++ += pt[j];
+#endif
+}
+static void imp_match_out_vead_tate( double *imp, int j1, int lgth1 )
+{
+ int i;
+ for( i=0; i<lgth1; i++ )
+ *imp++ += impmtx[i][j1];
+}
+
+void imp_rnaD( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***grouprna1, RNApair ***grouprna2, int *gapmap1, int *gapmap2, RNApair *pair )
+{
+ foldrna( nseq1, nseq2, seq1, seq2, eff1, eff2, grouprna1, grouprna2, impmtx, gapmap1, gapmap2, pair );
+}
+
+
+void imp_match_init_strictD( double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, char *swaplist, int forscore, int *orinum1, int *orinum2 )
+{
+// int i, j, k1, k2, tmpint, start1, start2, end1, end2;
+// double effij;
+// double effij_kozo;
+// double effijx;
+// char *pt, *pt1, *pt2;
+// static TLS char *nocount1 = NULL;
+// static TLS char *nocount2 = NULL;
+// LocalHom *tmpptr;
+
+ if( seq1 == NULL )
+ {
+ if( impmtx ) FreeFloatMtx( impmtx );
+ impmtx = NULL;
+// if( nocount1 ) free( nocount1 );
+// nocount1 = NULL;
+// if( nocount2 ) free( nocount2 );
+// nocount2 = NULL;
+
+ return;
+ }
+
+ if( impalloclen < lgth1 + 2 || impalloclen < lgth2 + 2 )
+ {
+ if( impmtx ) FreeFloatMtx( impmtx );
+// if( nocount1 ) free( nocount1 );
+// if( nocount2 ) free( nocount2 );
+ impalloclen = MAX( lgth1, lgth2 ) + 2;
+ impmtx = AllocateFloatMtx( impalloclen, impalloclen );
+// nocount1 = AllocateCharVec( impalloclen );
+// nocount2 = AllocateCharVec( impalloclen );
+ }
+
+ fillimp( impmtx, imp, clus1, clus2, lgth1, lgth2, seq1, seq2, eff1, eff2, eff1_kozo, eff2_kozo, localhom, swaplist, forscore, orinum1, orinum2 );
+}
+
+
+
+
+
+static void match_calc_del( int **which, double ***matrices, double *match, int n1, char **seq1, double *eff1, int n2, char **seq2, double *eff2, int i1, int lgth2, int mid, int nmask, int *mask1, int *mask2 )
+{
+// osoi!
+ int i, j, k, m;
+ int c1, c2;
+// fprintf( stderr, "\nmatch_calc_dynamicmtx... %d", i1 );
+// fprintf( stderr, "\nseq1[0]=%s\n", seq1[0] );
+// fprintf( stderr, "\nseq2[0]=%s\n", seq2[0] );
+// for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+// {
+// if( flip ) reporterr( "in match_calc_slow, which[%d][%d] = %d\n", j, i, which[j][i] );
+// else reporterr( "in match_calc_slow, which[%d][%d] = %d\n", i, j, which[i][j] );
+// }
+ for( k=0; k<lgth2; k++ )
+ {
+ for( m=0; m<nmask; m++ )
+ {
+ i = mask1[m];
+ j = mask2[m];
+// reporterr( "Deleting %d-%d (c=%d)\n", i, j, mid );
+// if( k==0 ) fprintf( stderr, "pairoffset[%d][%d] = %f\n", i, j, po );
+ c1 = amino_n[(unsigned char)seq1[i][i1]];
+ c2 = amino_n[(unsigned char)seq2[j][k]];
+// reporterr( "k=%d, c1=%d, c2=%d, seq1[i][i1]=%c, seq2[%d][%d]=%c\n", k, c1, c2, seq1[i][i1], j, k, seq2[j][k] );
+ if( seq1[i][i1] == '-' || seq2[j][k] == '-' ) continue;
+ if( c1 < 0 || c2 < 0 ) continue;
+// fprintf( stderr, "c1=%d, c2=%d\n", c1, c2 );
+// fprintf( stderr, "match[k] = %f -> ", match[k], mid );
+ match[k] -= matrices[mid][c1][c2] * eff1[i] * eff2[j];
+// fprintf( stderr, "match[k] = %f (mid=%d)\n", match[k], mid );
+ }
+ }
+// fprintf( stderr, "done\n" );
+ return;
+}
+
+#if SLOW
+static void match_calc_slow( int **which, double ***matrices, double *match, int n1, char **seq1, double *eff1, int n2, char **seq2, double *eff2, int i1, int lgth2, double **doublework, int **intwork, int initialize, int flip )
+{
+// osoi!
+ int i, j, k;
+ int c1, c2;
+ int mid;
+// fprintf( stderr, "\nmatch_calc_dynamicmtx... %d", i1 );
+// fprintf( stderr, "\nseq1[0]=%s\n", seq1[0] );
+// fprintf( stderr, "\nseq2[0]=%s\n", seq2[0] );
+// for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+// {
+// if( flip ) reporterr( "in match_calc_slow, which[%d][%d] = %d\n", j, i, which[j][i] );
+// else reporterr( "in match_calc_slow, which[%d][%d] = %d\n", i, j, which[i][j] );
+// }
+ for( k=0; k<lgth2; k++ )
+ {
+ match[k] = 0.0;
+ for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+ {
+ if( flip ) mid = which[j][i];
+ else mid = which[i][j];
+// if( k==0 ) fprintf( stderr, "pairoffset[%d][%d] = %f\n", i, j, po );
+ c1 = amino_n[(unsigned char)seq1[i][i1]];
+ c2 = amino_n[(unsigned char)seq2[j][k]];
+ if( seq1[i][i1] == '-' || seq2[j][k] == '-' ) continue;
+ if( c1 < 0 || c2 < 0 ) continue;
+// fprintf( stderr, "c1=%d, c2=%d\n", c1, c2 );
+ if( flip )
+ match[k] += matrices[mid][c1][c2] * eff1[i] * eff2[j];
+ else
+ match[k] += matrices[mid][c1][c2] * eff1[i] * eff2[j];
+// fprintf( stderr, "match[k] = %f (which=%d)\n", match[k], mid );
+ }
+ }
+// fprintf( stderr, "done\n" );
+ return;
+}
+#endif
+
+static void fillzero( double *s, int l )
+{
+ while( l-- ) *s++ = 0.0;
+}
+
+
+static void match_calc_add( double **scoreingmtx, double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
+{
+#if FASTMATCHCALC
+// fprintf( stderr, "\nmatch_calc... %d", i1 );
+ int j, l;
+// double scarr[26];
+ double **cpmxpd = doublework;
+ int **cpmxpdn = intwork;
+ double *matchpt, *cpmxpdpt, **cpmxpdptpt;
+ int *cpmxpdnpt, **cpmxpdnptpt;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
+ if( initialize )
+ {
+ int count = 0;
+ for( j=0; j<lgth2; j++ )
+ {
+ count = 0;
+ for( l=0; l<nalphabets; l++ )
+ {
+ if( cpmx2[l][j] )
+ {
+ cpmxpd[j][count] = cpmx2[l][j];
+ cpmxpdn[j][count] = l;
+ count++;
+ }
+ }
+ cpmxpdn[j][count] = -1;
+ }
+ }
+
+ {
+ for( l=0; l<nalphabets; l++ )
+ {
+ scarr[l] = 0.0;
+ for( j=0; j<nalphabets; j++ )
+// scarr[l] += n_dis[j][l] * cpmx1[j][i1];
+// scarr[l] += n_dis_consweight_multi[j][l] * cpmx1[j][i1];
+ scarr[l] += scoreingmtx[j][l] * cpmx1[j][i1];
+ }
+ matchpt = match;
+ cpmxpdnptpt = cpmxpdn;
+ cpmxpdptpt = cpmxpd;
+ while( lgth2-- )
+ {
+// *matchpt = 0.0;
+ cpmxpdnpt = *cpmxpdnptpt++;
+ cpmxpdpt = *cpmxpdptpt++;
+ while( *cpmxpdnpt>-1 )
+ *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++;
+ matchpt++;
+ }
+ }
+ free( scarr );
+// fprintf( stderr, "done\n" );
+#else
+ int j, k, l;
+// double scarr[26];
+ double **cpmxpd = doublework;
+ int **cpmxpdn = intwork;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
+// simple
+ if( initialize )
+ {
+ int count = 0;
+ for( j=0; j<lgth2; j++ )
+ {
+ count = 0;
+ for( l=0; l<nalphabets; l++ )
+ {
+ if( cpmx2[l][j] )
+ {
+ cpmxpd[count][j] = cpmx2[l][j];
+ cpmxpdn[count][j] = l;
+ count++;
+ }
+ }
+ cpmxpdn[count][j] = -1;
+ }
+ }
+ for( l=0; l<nalphabets; l++ )
+ {
+ scarr[l] = 0.0;
+ for( k=0; k<nalphabets; k++ )
+// scarr[l] += n_dis[k][l] * cpmx1[k][i1];
+// scarr[l] += n_dis_consweight_multi[k][l] * cpmx1[k][i1];
+ scarr[l] += scoreingmtx[k][l] * cpmx1[k][i1];
+ }
+ for( j=0; j<lgth2; j++ )
+ {
+ match[j] = 0.0;
+ for( k=0; cpmxpdn[k][j]>-1; k++ )
+ match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j];
+ }
+ free( scarr );
+#endif
+}
+
+static void match_calc( double **n_dynamicmtx, double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
+{
+#if FASTMATCHCALC
+// fprintf( stderr, "\nmatch_calc... %d", i1 );
+ int j, l;
+// double scarr[26];
+ double **cpmxpd = doublework;
+ int **cpmxpdn = intwork;
+ double *matchpt, *cpmxpdpt, **cpmxpdptpt;
+ int *cpmxpdnpt, **cpmxpdnptpt;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
+ if( initialize )
+ {
+ int count = 0;
+ for( j=0; j<lgth2; j++ )
+ {
+ count = 0;
+ for( l=0; l<nalphabets; l++ )
+ {
+ if( cpmx2[l][j] )
+ {
+ cpmxpd[j][count] = cpmx2[l][j];
+ cpmxpdn[j][count] = l;
+ count++;
+ }
+ }
+ cpmxpdn[j][count] = -1;
+ }
+ }
+
+ {
+ for( l=0; l<nalphabets; l++ )
+ {
+ scarr[l] = 0.0;
+ for( j=0; j<nalphabets; j++ )
+// scarr[l] += n_dis[j][l] * cpmx1[j][i1];
+// scarr[l] += n_dis_consweight_multi[j][l] * cpmx1[j][i1];
+ scarr[l] += n_dynamicmtx[j][l] * cpmx1[j][i1];
+ }
+ matchpt = match;
+ cpmxpdnptpt = cpmxpdn;
+ cpmxpdptpt = cpmxpd;
+ while( lgth2-- )
+ {
+ *matchpt = 0.0;
+ cpmxpdnpt = *cpmxpdnptpt++;
+ cpmxpdpt = *cpmxpdptpt++;
+ while( *cpmxpdnpt>-1 )
+ *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++;
+ matchpt++;
+ }
+ }
+ free( scarr );
+// fprintf( stderr, "done\n" );
+#else
+ int j, k, l;
+// double scarr[26];
+ double **cpmxpd = doublework;
+ int **cpmxpdn = intwork;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
+// simple
+ if( initialize )
+ {
+ int count = 0;
+ for( j=0; j<lgth2; j++ )
+ {
+ count = 0;
+ for( l=0; l<nalphabets; l++ )
+ {
+ if( cpmx2[l][j] )
+ {
+ cpmxpd[count][j] = cpmx2[l][j];
+ cpmxpdn[count][j] = l;
+ count++;
+ }
+ }
+ cpmxpdn[count][j] = -1;
+ }
+ }
+ for( l=0; l<nalphabets; l++ )
+ {
+ scarr[l] = 0.0;
+ for( k=0; k<nalphabets; k++ )
+// scarr[l] += n_dis[k][l] * cpmx1[k][i1];
+// scarr[l] += n_dis_consweight_multi[k][l] * cpmx1[k][i1];
+ scarr[l] += n_dynamicmtx[k][l] * cpmx1[k][i1];
+ }
+ for( j=0; j<lgth2; j++ )
+ {
+ match[j] = 0.0;
+ for( k=0; cpmxpdn[k][j]>-1; k++ )
+ match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j];
+ }
+ free( scarr );
+#endif
+}
+
+static void Atracking_localhom( double *impwmpt, double *lasthorizontalw, double *lastverticalw,
+ char **seq1, char **seq2,
+ char **mseq1, char **mseq2,
+ int **ijp, int icyc, int jcyc,
+ int *warpis, int *warpjs, int warpbase )
+{
+ int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;
+ double wm;
+ char *gaptable1, *gt1bk;
+ char *gaptable2, *gt2bk;
+ lgth1 = strlen( seq1[0] );
+ lgth2 = strlen( seq2[0] );
+ gt1bk = AllocateCharVec( lgth1+lgth2+1 );
+ gt2bk = AllocateCharVec( lgth1+lgth2+1 );
+
+#if 0
+ for( i=0; i<lgth1; i++ )
+ {
+ fprintf( stderr, "lastverticalw[%d] = %f\n", i, lastverticalw[i] );
+ }
+#endif
+
+ if( outgap == 1 )
+ ;
+ else
+ {
+ wm = lastverticalw[0];
+ for( i=0; i<lgth1; i++ )
+ {
+ if( lastverticalw[i] >= wm )
+ {
+ wm = lastverticalw[i];
+ iin = i; jin = lgth2-1;
+ ijp[lgth1][lgth2] = +( lgth1 - i );
+ }
+ }
+ for( j=0; j<lgth2; j++ )
+ {
+ if( lasthorizontalw[j] >= wm )
+ {
+ wm = lasthorizontalw[j];
+ iin = lgth1-1; jin = j;
+ ijp[lgth1][lgth2] = -( lgth2 - j );
+ }
+ }
+ }
+
+ for( i=0; i<lgth1+1; i++ )
+ {
+ ijp[i][0] = i + 1;
+ }
+ for( j=0; j<lgth2+1; j++ )
+ {
+ ijp[0][j] = -( j + 1 );
+ }
+
+ gaptable1 = gt1bk + lgth1+lgth2;
+ *gaptable1 = 0;
+ gaptable2 = gt2bk + lgth1+lgth2;
+ *gaptable2 = 0;
+
+ iin = lgth1; jin = lgth2;
+ limk = lgth1+lgth2 + 1;
+ *impwmpt = 0.0;
+ for( k=0; k<limk; k++ )
+ {
+ if( ijp[iin][jin] >= warpbase )
+ {
+ ifi = warpis[ijp[iin][jin]-warpbase];
+ jfi = warpjs[ijp[iin][jin]-warpbase];
+ }
+ else if( ijp[iin][jin] < 0 )
+ {
+ ifi = iin-1; jfi = jin+ijp[iin][jin];
+ }
+ else if( ijp[iin][jin] > 0 )
+ {
+ ifi = iin-ijp[iin][jin]; jfi = jin-1;
+ }
+ else
+ {
+ ifi = iin-1; jfi = jin-1;
+ }
+ if( ifi == -warpbase && jfi == -warpbase )
+ {
+ l = iin;
+ while( --l >= 0 )
+ {
+ *--gaptable1 = 'o';
+ *--gaptable2 = '-';
+ k++;
+ }
+ l= jin;
+ while( --l >= 0 )
+ {
+ *--gaptable1 = '-';
+ *--gaptable2 = 'o';
+ }
+ break;
+ }
+ else
+ {
+ l = iin - ifi;
+ while( --l )
+ {
+ *--gaptable1 = 'o';
+ *--gaptable2 = '-';
+ k++;
+ }
+ l= jin - jfi;
+ while( --l )
+ {
+ *--gaptable1 = '-';
+ *--gaptable2 = 'o';
+ k++;
+ }
+ }
+ if( iin == lgth1 || jin == lgth2 )
+ ;
+ else
+ {
+ *impwmpt += (double)imp_match_out_scD( iin, jin );
+
+// fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] );
+ }
+ if( iin <= 0 || jin <= 0 ) break;
+ *--gaptable1 = 'o';
+ *--gaptable2 = 'o';
+ k++;
+ iin = ifi; jin = jfi;
+ }
+
+ for( i=0; i<icyc; i++ ) gapireru( mseq1[i], seq1[i], gaptable1 );
+ for( j=0; j<jcyc; j++ ) gapireru( mseq2[j], seq2[j], gaptable2 );
+
+ free( gt1bk );
+ free( gt2bk );
+}
+
+static double Atracking( double *lasthorizontalw, double *lastverticalw,
+ char **seq1, char **seq2,
+ char **mseq1, char **mseq2,
+ int **ijp, int icyc, int jcyc,
+ int tailgp,
+ int *warpis, int *warpjs, int warpbase )
+{
+ int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;
+ double wm;
+ char *gaptable1, *gt1bk;
+ char *gaptable2, *gt2bk;
+ lgth1 = strlen( seq1[0] );
+ lgth2 = strlen( seq2[0] );
+
+ gt1bk = AllocateCharVec( lgth1+lgth2+1 );
+ gt2bk = AllocateCharVec( lgth1+lgth2+1 );
+
+#if 0
+ for( i=0; i<lgth1; i++ )
+ {
+ fprintf( stderr, "lastverticalw[%d] = %f\n", i, lastverticalw[i] );
+ }
+#endif
+
+ if( tailgp == 1 )
+ ;
+ else
+ {
+ wm = lastverticalw[0];
+ for( i=0; i<lgth1; i++ )
+ {
+ if( lastverticalw[i] >= wm )
+ {
+ wm = lastverticalw[i];
+ iin = i; jin = lgth2-1;
+ ijp[lgth1][lgth2] = +( lgth1 - i );
+ }
+ }
+ for( j=0; j<lgth2; j++ )
+ {
+ if( lasthorizontalw[j] >= wm )
+ {
+ wm = lasthorizontalw[j];
+ iin = lgth1-1; jin = j;
+ ijp[lgth1][lgth2] = -( lgth2 - j );
+ }
+ }
+ }
+
+ for( i=0; i<lgth1+1; i++ )
+ {
+ ijp[i][0] = i + 1;
+ }
+ for( j=0; j<lgth2+1; j++ )
+ {
+ ijp[0][j] = -( j + 1 );
+ }
+
+ gaptable1 = gt1bk + lgth1+lgth2;
+ *gaptable1 = 0;
+ gaptable2 = gt2bk + lgth1+lgth2;
+ *gaptable2 = 0;
+
+ iin = lgth1; jin = lgth2;
+ limk = lgth1+lgth2 + 1;
+ for( k=0; k<limk; k++ )
+ {
+ if( ijp[iin][jin] >= warpbase )
+ {
+ ifi = warpis[ijp[iin][jin]-warpbase];
+ jfi = warpjs[ijp[iin][jin]-warpbase];
+ }
+ else if( ijp[iin][jin] < 0 )
+ {
+ ifi = iin-1; jfi = jin+ijp[iin][jin];
+ }
+ else if( ijp[iin][jin] > 0 )
+ {
+ ifi = iin-ijp[iin][jin]; jfi = jin-1;
+ }
+ else
+ {
+ ifi = iin-1; jfi = jin-1;
+ }
+
+ if( ifi == -warpbase && jfi == -warpbase )
+ {
+ l = iin;
+ while( --l >= 0 )
+ {
+ *--gaptable1 = 'o';
+ *--gaptable2 = '-';
+ k++;
+ }
+ l= jin;
+ while( --l >= 0 )
+ {
+ *--gaptable1 = '-';
+ *--gaptable2 = 'o';
+ }
+ break;
+ }
+ else
+ {
+ l = iin - ifi;
+ while( --l )
+ {
+ *--gaptable1 = 'o';
+ *--gaptable2 = '-';
+ k++;
+ }
+ l= jin - jfi;
+ while( --l )
+ {
+ *--gaptable1 = '-';
+ *--gaptable2 = 'o';
+ k++;
+ }
+ }
+ if( iin <= 0 || jin <= 0 ) break;
+ *--gaptable1 = 'o';
+ *--gaptable2 = 'o';
+ k++;
+ iin = ifi; jin = jfi;
+ }
+
+ for( i=0; i<icyc; i++ ) gapireru( mseq1[i], seq1[i], gaptable1 );
+ for( j=0; j<jcyc; j++ ) gapireru( mseq2[j], seq2[j], gaptable2 );
+
+ free( gt1bk );
+ free( gt2bk );
+
+ return( 0.0 );
+}
+
+#if 1
+static void cleargaplens( Gaplen ****gaplens )
+{
+ int i;
+// int j, k;
+ Gaplen ****ptptptpt, ***ptptpt, ***ptptptmv, **ptptmv, **ptpt;
+ for( i=0; i<12; i++ )
+ {
+ ptptptpt = gaplens+i;
+
+ ptptptmv = *ptptptpt;
+ while( *(ptptpt=ptptptmv++) )
+// for( j=0; *(ptptpt=*ptptptpt+j); j++ )
+ {
+ ptptmv = *ptptpt;
+ while( *(ptpt=ptptmv++)!=(Gaplen *)1 )
+// for( k=0; gaplens[i][j][k]!=(Gaplen *)1; k++ )
+// for( k=0; *(ptpt=*ptptpt+k)!=(Gaplen *)1; k++ )
+ {
+// if( gaplens[i][j][k] ) free( gaplens[i][j][k] );
+ if( *ptpt ) free( *ptpt );
+ *ptpt = NULL;
+ }
+// free( gaplens[i][j] );
+ free( *ptptpt );
+// gaplens[i][j] = NULL;
+ *ptptpt = NULL;
+ }
+ }
+}
+#else
+static void cleargaplens( Gaplen ****gaplens )
+{
+ int i, j, k;
+ for( i=0; i<12; i++ )
+ {
+ for( j=0; gaplens[i][j]; j++ )
+ {
+ for( k=0; gaplens[i][j][k]!=(Gaplen *)1; k++ )
+ {
+ if( gaplens[i][j][k] ) free( gaplens[i][j][k] );
+ gaplens[i][j][k] = NULL;
+ }
+ free( gaplens[i][j] );
+ gaplens[i][j] = NULL;
+ }
+ }
+}
+#endif
+
+#if USEGAPLENHALFORMTX
+static void FreeGaplenMtxReport( Gaplen **mtx )
+{
+ int i;
+ if( mtx == NULL ) return;
+
+ for( i=0; ; i++ )
+ {
+ reporterr( "i=%d, mtx[i] = %p\n", i, mtx[i] );
+ if( mtx[i] )
+ {
+ if( mtx[i] == (Gaplen *)1 ) break;
+ free( mtx[i] ); mtx[i] = NULL;
+ }
+ mtx[i] = NULL;
+ }
+ free( mtx );
+ mtx = NULL;
+}
+#endif
+
+static void FreeGaplenMtx( Gaplen **mtx, int inclfreq )
+{
+ int i;
+ if( mtx == NULL ) return;
+
+ for( i=0; ; i++ )
+ {
+ if( mtx[i] )
+ {
+ if( mtx[i] == (Gaplen *)1 ) break;
+
+#if 0
+ if( inclfreq )
+ {
+// reporterr( "inclfreq=%d\n", inclfreq );
+ for( j=0; mtx[i][j].relend==0; j++ )
+ {
+// reporterr( "j=%d\n", j );
+// reporterr( "Free! freq\n" );
+ if( mtx[i][j].freq )
+ {
+ free( mtx[i][j].freq );
+ }
+ mtx[i][j].freq = NULL;
+ }
+ }
+#endif
+
+ free( mtx[i] ); mtx[i] = NULL;
+ }
+ }
+ free( mtx );
+ mtx = NULL;
+}
+
+#if USEGAPLENHALFORMTX
+static void FreeGaplenCubgaplenReport( Gaplen ***cub )
+{
+ int i;
+ if( cub == NULL ) return;
+
+ for( i=0; cub[i]; i++ )
+ {
+ reporterr( "i=%d, cub[i]=%p\n", i, cub[i] );
+ FreeGaplenMtx( cub[i], 0 );
+ cub[i] = NULL;
+ }
+ free( cub );
+ cub = NULL;
+}
+#endif
+
+static void FreeGaplenCub( Gaplen ***cub )
+{
+ int i;
+ if( cub == NULL ) return;
+
+ for( i=0; cub[i]; i++ )
+ {
+ FreeGaplenMtx( cub[i], 0 );
+ cub[i] = NULL;
+ }
+ free( cub );
+ cub = NULL;
+}
+
+static int strralpha( const char *s, const char *first )
+{
+ int v = 0;
+ s--;
+ while( s >= first )
+ {
+ if( *s-- != '-' ) return( v );
+ v++;
+ }
+ if( s == first-1 ) return( v );
+ return( -1 );
+}
+
+static void fillgaplen( Gaplen **mtx, int l )
+{
+ int i, j, n, k, len, pos, idatnext;
+ double freq;
+ for( i=0; i<=l; i++ )
+ {
+// reporterr( "i=%d\n", i );
+ if( mtx[i] == NULL ) continue;
+ for( n=0; (len=mtx[i][n].len)!=-1; n++ )
+ {
+ freq = mtx[i][n].freq;
+ idatnext = n;
+ for( j=0; j<len; j++ )
+ {
+// reporterr( "n=%d, j=%d, i=%d, len=%d\n", n, j, i, len );
+ pos = i-1-j;
+// reporterr( "pos = %d\n", pos );
+ if( mtx[pos] == NULL )
+ {
+ mtx[pos] = calloc( 2, sizeof( Gaplen ) );
+ mtx[pos][0].len = -1;
+#if USEGAPLENHALFORMTX
+ mtx[pos][0].idatend = -1;
+#endif
+ mtx[pos][0].idatnext = -1;
+ mtx[pos][0].idatprev = -1;
+ mtx[pos][0].relend = -1;
+ mtx[pos][0].freq = 0.0;
+ mtx[pos][0].npat = -1;
+ k = 0;
+ }
+ else
+ {
+ k = mtx[pos][0].npat;
+ }
+ mtx[pos] = realloc( mtx[pos], sizeof( Gaplen ) * ( k + 2 ) );
+// mtx[pos][k].len = -100000; // tsukawanai!
+ mtx[pos][k].len = len; // compact no toki tsukau!
+ mtx[pos][k].relend = j+1;
+ mtx[pos][k].freq = freq;// tsukawanai! mtx[i][n].freq;
+ mtx[pos][k].idatnext = idatnext;
+ mtx[pos][k].idatprev =-1;
+ mtx[pos][k].npat = -1;
+ mtx[pos][k+1].len = -1;
+ mtx[pos][k+1].idatnext = -1;
+ mtx[pos][k+1].relend = -1;
+ mtx[pos][k+1].freq = 0.0;
+ mtx[pos][k+1].npat = -1;
+ mtx[pos][0].npat = k+1;
+#if USEGAPLENHALFORMTX
+ mtx[pos][k].idatend = n;
+ mtx[pos][k+1].idatend = -1;
+#endif
+
+ mtx[pos+1][idatnext].idatprev = k; // kanarazu aru?
+
+ idatnext = k;
+ }
+ }
+ }
+}
+
+static int gapvariety( int n, int l, char **seq )
+{
+ int i, j, gl, *known, nknown, val;
+ known = calloc( l+1, sizeof( int ) );
+// for( i=0; i<n; i++ ) reporterr( "seq[%d] = %s\n", i, seq[i] );
+
+ val = 0;
+ for( j=0; j<=l; j++ )
+ {
+ for( i=0; i<j; i++ ) known[i] = 0;
+ nknown = 0;
+ for( i=0; i<n; i++ )
+ {
+ if( seq[i][j] == '-' ) continue;
+
+ gl = strralpha( seq[i]+j, seq[i] );
+// reporterr( "gl = %d\n", gl );
+ if( gl > 0 )
+ {
+ if( known[gl] )
+ {
+ ;
+ }
+ else
+ {
+ nknown++;
+ }
+ }
+ }
+ val += nknown;
+ }
+ free( known );
+
+ return( val );
+}
+
+
+static void gaplencount( int n, int l, Gaplen **mtx, char **seq, double *eff )
+{
+ int i, j, k, gl, *known, nknown;
+ known = calloc( l+1, sizeof( int ) );
+// for( i=0; i<n; i++ ) reporterr( "seq[%d] = %s\n", i, seq[i] );
+
+ for( j=0; j<=l; j++ )
+ {
+ if( mtx[j] )
+ {
+ if( mtx[j] == (Gaplen *)1 ) break;
+ for( k=0; mtx[j][k].relend==0; k++ )
+ {
+#if 0
+// reporterr( "j=%d\n", j );
+// reporterr( "Free! freq\n" );
+ if( mtx[j][k].freq )
+ {
+ free( mtx[j][k].freq );
+ }
+ mtx[j][k].freq = NULL;
+#endif
+ }
+ free( mtx[j] );
+ }
+ mtx[j] = NULL;
+ }
+
+ for( j=0; j<=l; j++ )
+ {
+ for( i=0; i<j; i++ ) known[i] = 0;
+ nknown = 0;
+ for( i=0; i<n; i++ )
+ {
+ if( seq[i][j] == '-' ) continue;
+
+ gl = strralpha( seq[i]+j, seq[i] );
+// reporterr( "gl = %d\n", gl );
+ if( gl > 0 )
+ {
+ if( known[gl] )
+ {
+// reporterr( "gl=%d, Known!\n", gl );
+ for( k=0; mtx[j][k].len!=-1; k++ ) if( mtx[j][k].len == gl ) break;
+ if( mtx[j][k].len == -1 )
+ {
+ reporterr( "Unexpected error!\n" );
+ exit( 1 );
+ }
+ mtx[j][k].freq += eff[i];
+ }
+ else
+ {
+// reporterr( "gl=%d, First!\n", gl );
+ mtx[j] = realloc( mtx[j], sizeof( Gaplen ) * (nknown+2) );
+ mtx[j][nknown].len = gl;
+ mtx[j][nknown].relend = 0;
+ mtx[j][nknown].freq = eff[i];
+ mtx[j][nknown].idatnext = -2;
+ mtx[j][nknown+1].len = -1;
+ mtx[j][nknown+1].idatnext = -1;
+ mtx[j][nknown+1].relend = -1;
+ mtx[j][nknown+1].freq = 0.0;
+ mtx[j][nknown+1].npat = -1;
+#if USEGAPLENHALFORMTX
+ mtx[j][nknown].idatend = nknown;
+ mtx[j][nknown+1].idatend = -1;
+#endif
+ known[gl]++;
+ nknown++;
+ mtx[j][0].npat = nknown;
+ }
+ }
+ }
+ }
+ fillgaplen( mtx, l );
+#if 0
+ reporterr( "Gaplen:\n" );
+ for( i=0; i<=l; i++ )
+ {
+// reporterr( "i=%d, gaplen[i] = %p\n", i, mtx[i] );
+ if( mtx[i] )
+ {
+ for( j=0; mtx[i][j].len!=-1; j++ )
+ reporterr( "i=%d, len = %d, relend = %d, freq = %f\n", i, mtx[i][j].len, mtx[i][j].relend, mtx[i][j].freq );
+ }
+ }
+
+#endif
+
+ free( known );
+}
+
+
+#if DEBUG
+static void showgaplen( Gaplen **mtx, int seqlen )
+{
+ int i, l;
+#if USEGAPLENHALFORMTX
+ int id, pos;
+#endif
+// for( i=0; i<=seqlen; i++ )
+ for( i=0; ; i++ )
+ {
+// reporterr( "chain[%d] = %d\n", i, chain[i] );
+ if( mtx[i] == NULL ) continue;
+ if( mtx[i] == (Gaplen *)1 ) break;
+ for( l=0; mtx[i][l].idatnext!=-1; l++ )
+ {
+#if USEGAPLENHALFORMTX
+ reporterr( "i=%d, l=%d, len=%d, relend=%d, idatend=%d, idatnext=%d, idatprev=%d, freq=%f\n", i, l, mtx[i][l].len, mtx[i][l].relend, mtx[i][l].idatend, mtx[i][l].idatnext, mtx[i][l].idatprev, mtx[i][l].freq );
+ pos = mtx[i][l].relend;
+ id = mtx[i][l].idatend;
+ if( mtx[i+pos] == NULL )
+ {
+// reporterr( "Error in SOURCE\n" );
+ reporterr( ".len and .freq were lost when i=%d!\n", i );
+// exit( 1 );
+ }
+#else
+ reporterr( "i=%d, l=%d, len=%d, relend=%d, idatnext=%d, idatprev=%d, freq=%f\n", i, l, mtx[i][l].len, mtx[i][l].relend, mtx[i][l].idatnext, mtx[i][l].idatprev, mtx[i][l].freq );
+#endif
+ }
+ }
+}
+#endif
+
+#if WMCHECK
+static int pairgapcount( char *s1, char *s2 )
+{
+ char **tmpseq;
+ int i, len, st, k;
+ int v = 0;
+
+ len = strlen( s1 );
+ tmpseq = calloc( sizeof( char * ), 2 );
+ tmpseq[0] = malloc( sizeof( char ) * ( len + 1 ) );
+ tmpseq[1] = malloc( sizeof( char ) * ( len + 1 ) );
+
+ strcpy( tmpseq[0], s1 );
+ strcpy( tmpseq[1], s2 );
+
+ commongappick( 2, tmpseq );
+ len = strlen( tmpseq[0] );
+
+
+ for( k=0; k<2; k++ )
+ {
+ st = 0;
+ for( i=0; i<len; i++ )
+ {
+ if( tmpseq[k][i] == '-' )
+ {
+ if( st == 0 )
+ {
+ v++;
+ st = 1;
+ }
+ }
+ else
+ {
+ st = 0;
+ }
+ }
+ }
+ free( tmpseq[0] );
+ free( tmpseq[1] );
+ free( tmpseq );
+
+ return( v );
+}
+#endif
+
+static double calcpfac_gap_noidatend( Gaplen **gaplen1, Gaplen **gaplen2, int newgaplen, int i, int j, char *seq1, char *seq2, int disp ) // seq1 to seq2 ha debug you
+{
+#if 1
+ double pfac, pfac1, pfac10, pfac2;
+ int k, l, pos1, pos2;
+ Gaplen *gaplen1i, *gaplen2j, *g1, *g2;
+
+#if 0 // .len no shouryaku ni taiou shiteinai
+ int gl;
+ if( disp )
+ {
+ reporterr( "calcpfac_gap_noidatend, %c (%d) - %c (%d)\n", seq1[i], i, seq2[j], j );
+ reporterr( "newgaplen = %d\n", newgaplen );
+ reporterr( "In calcpfac_gap, gaplen1[%d(%c)] = \n", i, seq1[i] );
+ for( k=0; gaplen1[i]&&(id1=gaplen1[i][k].idatnext)!=-1; k++ )
+ {
+ pos1 = gaplen1[i][k].relend;
+ reporterr( ".len=%d, .relend=%d, .freq=%f\n", gaplen1[i+pos1][id1].len, gaplen1[i][k].relend, gaplen1[i+pos1][id1].freq[0] );
+ }
+ reporterr( "In calcpfac_gap, gaplen2[%d(%c)] = \n", j, seq2[j] );
+ showgaplen( gaplen2, strlen(seq2) );
+ for( k=0; gaplen2[j]&&(id2=gaplen2[j][k].idatnext)!=-1; k++ )
+ {
+ pos2 = gaplen2[j][k].relend;
+ reporterr( ".len=%d, .relend=%d, .freq=%f\n", gaplen2[j+pos2][id2].len, gaplen2[j][k].relend, gaplen2[j+pos2][id2].freq[0] );
+ }
+ }
+#endif
+ gaplen2j = gaplen2[j];
+ gaplen1i = gaplen1[i];
+
+ pfac = 0.0;
+ pfac1 = 0.0;
+ pfac10 = 0.0;
+ if( gaplen1i ) for( k=0; (g1=gaplen1i+k)->idatnext!=-1; k++ )
+ {
+ pos1 = g1->relend;
+ if( pos1 != 0 )
+ {
+ pfac2 = 0.0;
+ if( gaplen2j ) for( l=0; (g2=gaplen2j+l)->idatnext!=-1; l++ )
+ {
+ pos2 = g2->relend;
+ if( pos2 == 0 && g2->len >g1->len - (pos1) + newgaplen )
+ {
+ pfac2 += g2->freq;
+// reporterr( "hit! pfac2=%f, .freq=%f\n", pfac2, gaplen2[j][l].freq );
+ }
+// else
+// reporterr( "does not hit! pfac2=%f, gaplen1[i][k].len=%d, gaplen[i][k].relend=%d, newgaplen=%d\n", pfac2, gaplen1[i][k].len, gaplen1[i][k].relend, newgaplen );
+ }
+ pfac += pfac2 * g1->freq;
+ pfac1 += g1->freq;
+ }
+ else if( pos1 == 0 )
+ {
+ pfac2 = 1.0;
+ if( gaplen2j ) for( l=0; (g2=gaplen2j+l)->idatnext!=-1; l++ )
+ {
+ pos2 = g2->relend;
+ if( pos2 == 0 && g2->len == g1->len+newgaplen ) pfac2 -= g2->freq;// kokode shuryou suru gap, gaplen1 ha kangaenai.
+ if( pos2 != 0 && g2->len - (pos2-1) > g1->len+newgaplen ) pfac2 -= g2->freq;// keizoku suru gap, gaplen1 ha kangaenai.
+ }
+// reporterr( "pfac2 in line 1056 = %f\n", pfac2 );
+ pfac += pfac2 * g1->freq;
+ pfac10 += g1->freq;
+ }
+ }
+#if DEBUG
+ reporterr( "pfac1 (step2) = %f\n", pfac1 );
+ reporterr( "pfac10 (step2) = %f\n", pfac10 );
+ reporterr( "pfac (step2) = %f\n", pfac );
+#endif
+
+ pfac1 = 1.0 - pfac1 - pfac10;
+ pfac2 = 1.0;
+ if( gaplen2j ) for( l=0; (g2=gaplen2j+l)->idatnext!=-1; l++ )
+ {
+ pos2 = g2->relend;
+ if( pos2 == 0 && g2->len == newgaplen ) pfac2 -= g2->freq;// kokode shuryou suru gap, gaplen1 ha kangaenai.
+ if( pos2 != 0 && g2->len - (pos2-1) > newgaplen ) pfac2 -= g2->freq;// keizoku suru gap, gaplen1 ha kangaenai.
+ }
+#if DEBUG
+ reporterr( "pfac1 (type3) = %f\n", pfac1 );
+ reporterr( "pfac2 (type3) = %f\n", pfac2 );
+ reporterr( "pfac (step3) = %f\n", pfac );
+#endif
+ pfac += pfac1 * pfac2;
+#if DEBUG
+ reporterr( "incomplete pfac = %f, pfac1,pfac2 (%c%d,%c%d) = %f, %f\n", pfac, seq1[i], i, seq2[j], j, pfac1, pfac2 );
+#endif
+
+
+ return( pfac );
+
+#else
+
+ double pfac, pfac1, pfac10, pfac2;
+ int k, l, pos1, pos2, id1, id2;
+ Gaplen *gaplen1i, *gaplen2j;
+
+#if 0 // .len no shouryaku ni taiou shiteinai
+ int gl;
+ if( disp )
+ {
+ reporterr( "calcpfac_gap_noidatend, %c (%d) - %c (%d)\n", seq1[i], i, seq2[j], j );
+ reporterr( "newgaplen = %d\n", newgaplen );
+ reporterr( "In calcpfac_gap, gaplen1[%d(%c)] = \n", i, seq1[i] );
+ for( k=0; gaplen1[i]&&(id1=gaplen1[i][k].idatend)!=-1; k++ )
+ {
+ pos1 = gaplen1[i][k].relend;
+ reporterr( ".len=%d, .relend=%d, .freq=%f\n", gaplen1[i+pos1][id1].len, gaplen1[i][k].relend, gaplen1[i+pos1][id1].freq[0] );
+ }
+ reporterr( "In calcpfac_gap, gaplen2[%d(%c)] = \n", j, seq2[j] );
+ showgaplen( gaplen2, strlen(seq2) );
+ for( k=0; gaplen2[j]&&(id2=gaplen2[j][k].idatend)!=-1; k++ )
+ {
+ pos2 = gaplen2[j][k].relend;
+ reporterr( ".len=%d, .relend=%d, .freq=%f\n", gaplen2[j+pos2][id2].len, gaplen2[j][k].relend, gaplen2[j+pos2][id2].freq[0] );
+ }
+ }
+#endif
+ gaplen2j = gaplen2[j];
+ gaplen1i = gaplen1[i];
+
+ pfac = 0.0;
+ pfac1 = 0.0;
+ pfac10 = 0.0;
+ if( gaplen1i ) for( k=0; (gaplen1i[k].idatnext)!=-1; k++ )
+ {
+ pos1 = gaplen1i[k].relend;
+ if( pos1 != 0 )
+ {
+ pfac2 = 0.0;
+ if( gaplen2j ) for( l=0; (gaplen2j[l].idatnext)!=-1; l++ )
+ {
+ pos2 = gaplen2j[l].relend;
+ if( pos2 == 0 && gaplen2j[l].len > gaplen1i[k].len - (pos1) + newgaplen )
+ {
+ pfac2 += gaplen2j[l].freq;
+// reporterr( "hit! pfac2=%f, .freq=%f\n", pfac2, gaplen2[j][l].freq );
+ }
+// else
+// reporterr( "does not hit! pfac2=%f, gaplen1[i][k].len=%d, gaplen[i][k].relend=%d, newgaplen=%d\n", pfac2, gaplen1[i][k].len, gaplen1[i][k].relend, newgaplen );
+ }
+ pfac += pfac2 * gaplen1i[k].freq;
+ pfac1 += gaplen1i[k].freq;
+ }
+ else if( pos1 == 0 )
+ {
+ pfac2 = 1.0;
+ if( gaplen2j ) for( l=0; (gaplen2j[l].idatnext)!=-1; l++ )
+ {
+ pos2 = gaplen2j[l].relend;
+ if( pos2 == 0 && gaplen2j[l].len == gaplen1i[k].len+newgaplen ) pfac2 -= gaplen2j[l].freq;// kokode shuryou suru gap, gaplen1 ha kangaenai.
+ if( pos2 != 0 && gaplen2j[l].len - (pos2-1) > gaplen1i[k].len+newgaplen ) pfac2 -= gaplen2j[l].freq;// keizoku suru gap, gaplen1 ha kangaenai.
+ }
+// reporterr( "pfac2 in line 1056 = %f\n", pfac2 );
+ pfac += pfac2 * gaplen1i[k].freq;
+ pfac10 += gaplen1i[k].freq;
+ }
+ }
+#if DEBUG
+ reporterr( "pfac1 (step2) = %f\n", pfac1 );
+ reporterr( "pfac10 (step2) = %f\n", pfac10 );
+ reporterr( "pfac (step2) = %f\n", pfac );
+#endif
+
+ pfac1 = 1.0 - pfac1 - pfac10;
+ pfac2 = 1.0;
+ if( gaplen2j ) for( l=0; (gaplen2j[l].idatnext)!=-1; l++ )
+ {
+ pos2 = gaplen2j[l].relend;
+ if( pos2 == 0 && gaplen2j[l].len == newgaplen ) pfac2 -= gaplen2j[l].freq;// kokode shuryou suru gap, gaplen1 ha kangaenai.
+ if( pos2 != 0 && gaplen2j[l].len - (pos2-1) > newgaplen ) pfac2 -= gaplen2j[l].freq;// keizoku suru gap, gaplen1 ha kangaenai.
+ }
+#if DEBUG
+ reporterr( "pfac1 (type3) = %f\n", pfac1 );
+ reporterr( "pfac2 (type3) = %f\n", pfac2 );
+ reporterr( "pfac (step3) = %f\n", pfac );
+#endif
+ pfac += pfac1 * pfac2;
+#if DEBUG
+ reporterr( "incomplete pfac = %f, pfac1,pfac2 (%c%d,%c%d) = %f, %f\n", pfac, seq1[i], i, seq2[j], j, pfac1, pfac2 );
+#endif
+
+
+ return( pfac );
+
+#endif
+}
+
+#if USEGAPLENHALFORMTX
+
+static double calcpfac_gap_incomplete( Gaplen **gaplen1, Gaplen **gaplen2, int newgaplen, int i, int j, char *seq1, char *seq2, int disp ) // seq1 to seq2 ha debug you
+{
+ double pfac, pfac1, pfac10, pfac2;
+ int k, l, pos1, pos2, id1, id2;
+ Gaplen *gapend1, *gapend2;
+ Gaplen *gaplen1i, *gaplen2j;
+
+#if 0 // .len no shouryaku ni taiou shiteinai
+ int gl;
+ if( disp )
+ {
+ reporterr( "calcpfac_gap_incomplete, %c (%d) - %c (%d)\n", seq1[i], i, seq2[j], j );
+ reporterr( "newgaplen = %d\n", newgaplen );
+ reporterr( "In calcpfac_gap, gaplen1[%d(%c)] = \n", i, seq1[i] );
+ for( k=0; gaplen1[i]&&(id1=gaplen1[i][k].idatend)!=-1; k++ )
+ {
+ pos1 = gaplen1[i][k].relend;
+ reporterr( ".len=%d, .relend=%d, .freq=%f\n", gaplen1[i+pos1][id1].len, gaplen1[i][k].relend, gaplen1[i+pos1][id1].freq[0] );
+ }
+ reporterr( "In calcpfac_gap, gaplen2[%d(%c)] = \n", j, seq2[j] );
+ showgaplen( gaplen2, strlen(seq2) );
+ for( k=0; gaplen2[j]&&(id2=gaplen2[j][k].idatend)!=-1; k++ )
+ {
+ pos2 = gaplen2[j][k].relend;
+ reporterr( ".len=%d, .relend=%d, .freq=%f\n", gaplen2[j+pos2][id2].len, gaplen2[j][k].relend, gaplen2[j+pos2][id2].freq[0] );
+ }
+ }
+#endif
+ gaplen2j = gaplen2[j];
+ gaplen1i = gaplen1[i];
+
+ pfac = 0.0;
+ pfac1 = 0.0;
+ pfac10 = 0.0;
+ if( gaplen1i ) for( k=0; (id1=gaplen1i[k].idatend)!=-1; k++ )
+ {
+ pos1 = gaplen1i[k].relend;
+ gapend1 = gaplen1[i+pos1]+id1;
+ if( pos1 != 0 )
+ {
+ pfac2 = 0.0;
+ if( gaplen2j ) for( l=0; (id2=gaplen2j[l].idatend)!=-1; l++ )
+ {
+ pos2 = gaplen2j[l].relend;
+ gapend2 = gaplen2[j+pos2]+id2;
+// if( pos2 == 0 && gapend2->len + 1 > gapend1->len - (pos1-1) + newgaplen )
+ if( pos2 == 0 && gapend2->len > gapend1->len - (pos1) + newgaplen )
+ {
+ pfac2 += gapend2->freq;
+// reporterr( "hit! pfac2=%f, .freq=%f\n", pfac2, gaplen2[j][l].freq );
+ }
+// else
+// reporterr( "does not hit! pfac2=%f, gaplen1[i][k].len=%d, gaplen[i][k].relend=%d, newgaplen=%d\n", pfac2, gaplen1[i][k].len, gaplen1[i][k].relend, newgaplen );
+ }
+ pfac += pfac2 * gapend1->freq;
+ pfac1 += gapend1->freq;
+ }
+ else if( pos1 == 0 )
+ {
+ pfac2 = 1.0;
+ if( gaplen2j ) for( l=0; (id2=gaplen2j[l].idatend)!=-1; l++ )
+ {
+ pos2 = gaplen2j[l].relend;
+ gapend2 = gaplen2[j+pos2]+id2;
+ if( pos2 == 0 && gapend2->len == gapend1->len+newgaplen ) pfac2 -= gapend2->freq;// kokode shuryou suru gap, gaplen1 ha kangaenai.
+ if( pos2 != 0 && gapend2->len - (pos2-1) > gapend1->len+newgaplen ) pfac2 -= gapend2->freq;// keizoku suru gap, gaplen1 ha kangaenai.
+ }
+// reporterr( "pfac2 in line 1056 = %f\n", pfac2 );
+ pfac += pfac2 * gapend1->freq;
+ pfac10 += gapend1->freq;
+ }
+ }
+#if DEBUG
+ reporterr( "pfac1 (step2) = %f\n", pfac1 );
+ reporterr( "pfac10 (step2) = %f\n", pfac10 );
+ reporterr( "pfac (step2) = %f\n", pfac );
+#endif
+
+ pfac1 = 1.0 - pfac1 - pfac10;
+ pfac2 = 1.0;
+ if( gaplen2j ) for( l=0; (id2=gaplen2j[l].idatend)!=-1; l++ )
+ {
+ pos2 = gaplen2j[l].relend;
+ gapend2 = gaplen2[j+pos2]+id2;
+ if( pos2 == 0 && gapend2->len == newgaplen ) pfac2 -= gapend2->freq;// kokode shuryou suru gap, gaplen1 ha kangaenai.
+ if( pos2 != 0 && gapend2->len - (pos2-1) > newgaplen ) pfac2 -= gapend2->freq;// keizoku suru gap, gaplen1 ha kangaenai.
+ }
+#if DEBUG
+ reporterr( "pfac1 (type3) = %f\n", pfac1 );
+ reporterr( "pfac2 (type3) = %f\n", pfac2 );
+ reporterr( "pfac (step3) = %f\n", pfac );
+#endif
+ pfac += pfac1 * pfac2;
+#if DEBUG
+ reporterr( "incomplete pfac = %f, pfac1,pfac2 (%c%d,%c%d) = %f, %f\n", pfac, seq1[i], i, seq2[j], j, pfac1, pfac2 );
+#endif
+
+
+ return( pfac );
+
+}
+
+static double calcpfac_gapex( Gaplen **gaplen1, Gaplen **gaplen2, int i, int j, int newgaplen, char *seq1, char *seq2, int disp )
+{
+ double pfac, pfac1, pfac2, pfac10;
+ int k, l, id1, id2, pos1, pos2;
+ Gaplen *gapend1, *gapend2;
+ Gaplen *gaplen1i, *gaplen2j;
+
+ gaplen1i = gaplen1[i];
+ gaplen2j = gaplen2[j];
+
+ pfac = 0.0;
+ pfac2 = 0.0;
+// for( k=0; gaplen2[j]&&(gl=gaplen2[j][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou
+ if( gaplen2j ) for( k=0; (id2=gaplen2j[k].idatend)!=-1; k++ ) // ososugi! hash ni atode henkou
+ {
+#if DEBUG
+ int gl;
+ pos2 = gaplen2j[k].relend;
+ id2 = gaplen2j[k].idatend;
+ gl = gaplen2[j+pos2][id2].len;
+ if( disp ) reporterr( "gaplen2[][].len=%d, .relend=%d, .freq=%f\n", gaplen2[j+pos2][id2].len, gaplen2[j][k].relend, gaplen2[j+pos2][id2].freq );
+ if( disp ) reporterr( "gl = %d, newgaplen=%d\n", gl, newgaplen );
+#endif
+ if( (pos2=gaplen2[j][k].relend) != 0 ) continue;
+
+ gapend2 = gaplen2[j+pos2]+id2;
+ pfac1 = 1.0;
+ pfac10 = 1.0;
+ if( gaplen1i ) for( l=0; (id1=gaplen1i[l].idatend)!=-1; l++ ) // ososugi! hash ni atode henkou
+ {
+ pos1 = gaplen1i[l].relend;
+ gapend1 = gaplen1[i+pos1]+id1;
+ pfac10 -= gapend1->freq;
+#if DEBUG
+ if( disp ) reporterr( "gaplen1[][].len=%d, .relend=%d, .freq=%f\n", gaplen1[i+pos1][id1].len, gaplen1[i][l].relend, gaplen1[i+pos1][id1].freq );
+#endif
+ if( newgaplen + gapend1->len - (pos1) > gapend2->len - (pos2) ) pfac1 -= gapend1->freq;
+// reporterr( "pfac1 = %f\n", pfac1 );
+ }
+ pfac += pfac1 * gapend2->freq;
+
+
+/* ???? */
+ if( newgaplen >= gapend2->len - (pos2-1) ) // >= or >??
+ {
+ pfac -= pfac10 * gapend2->freq;
+// reporterr( "Hit! pfac1 = %f\n", pfac1 );
+ }
+/* ???? */
+
+
+// if( gaplen2[j][k].relend == -1 ) pfac += gaplen2[j][k].freq;
+ }
+
+ return( pfac );
+}
+
+static double calcpfac( Gaplen **gaplen1, Gaplen **gaplen2, int i, int j, char *seq1, char *seq2, int disp ) // seq1 to seq2 ha debug you
+{
+ double pfac, pfac1, pfac2;
+ int k, l, pos1, pos2, id1, id2;
+ Gaplen *gapend1, *gapend2;
+ Gaplen *gaplen1i, *gaplen2j;
+
+ gaplen1i = gaplen1[i];
+ gaplen2j = gaplen2[j];
+
+#if DEBUG
+ if( disp )
+ {
+ reporterr( "seq1[0] = %s\n", seq1 );
+ reporterr( "seq2[0] = %s\n", seq2 );
+ reporterr( "i,j=%d,%d\n", i, j );
+
+ reporterr( "In calcpfac(), gaplen1[%d(%c)] = \n", i, seq1[i] );
+// showgaplen( gaplen1, seqlen( seq1 ) );
+ for( k=0; gaplen1[i]&&(id1=gaplen1[i][k].idatend)!=-1; k++ )
+ {
+ pos1 = gaplen1[i][k].relend;
+ reporterr( "pos1=%d, id1=%d\n", pos1, id1 );
+ reporterr( ".len=%d, .relend=%d, .freq=%f\n", gaplen1[i+pos1][id1].len, gaplen1[i][k].relend, gaplen1[i+pos1][id1].freq );
+ }
+
+ reporterr( "In calcpfac(), gaplen2[%d(%c)] = \n", j, seq2[j] );
+// showgaplen( gaplen2, seqlen( seq2 ) );
+ for( k=0; gaplen2[j]&&(id2=gaplen2[j][k].idatend)!=-1; k++ )
+ {
+ pos2 = gaplen2[j][k].relend;
+ reporterr( "j=%d, k=%d, id2=%d, pos2=%d\n", j, k, id2, pos2 );
+ reporterr( ".len=%d, .relend=%d\n", gaplen2[j+pos2][id2].len, gaplen2[j][k].relend );
+ reporterr( ".freq=%f\n", gaplen2[j+pos2][id2].freq );
+ }
+ }
+#endif
+
+ pfac1 = pfac2 = 0.0;
+ if( gaplen1i ) for( k=0; (id1=gaplen1i[k].idatend)!=-1; k++ )
+ {
+ if( (pos1=gaplen1i[k].relend) == 0 ) pfac1 += gaplen1[i+pos1][id1].freq;
+ }
+
+ if( gaplen2j ) for( l=0; (id2=gaplen2j[l].idatend)!=-1; l++ ) // ososugi! hash ni atode henkou
+ {
+ if( (pos2=gaplen2j[l].relend) == 0 ) pfac2 += gaplen2[j+pos2][id2].freq;
+ }
+#if DEBUG
+ reporterr( "\n\nInitial pfac1,pfac2 (%c%d,%c%d) = %f, %f\n", seq1[i], i, seq2[j], j, pfac1, pfac2 );
+#endif
+ pfac = pfac1 * pfac2 + pfac1 * (1-pfac2) + pfac2 * (1-pfac1);
+#if DEBUG
+ reporterr( "\n\nInitial pfac (%d,%d) = %f\n", i, j, pfac );
+#endif
+
+#if 1
+// if( pfac ) reporterr( "i,j=%d,%d, Cancel (eq len)? pfac = %f -> ", i, j, pfac );
+ if( gaplen1i ) for( k=0; (id1=gaplen1i[k].idatend)!=-1; k++ ) // ososugi! hash ni atode henkou
+ {
+ pos1=gaplen1i[k].relend;
+ gapend1 = gaplen1[i+pos1]+id1;
+ if( gaplen2j ) for( l=0; (id2=gaplen2j[l].idatend)!=-1; l++ ) // ososugi! hash ni atode henkou
+ {
+ pos2 = gaplen2j[l].relend;
+ gapend2 = gaplen2[j+pos2]+id2;
+ if ( pos1 == 0 && pos2 == 0 && gapend1->len == gapend2->len ) pfac -= gapend1->freq * gapend2->freq;
+ else if( pos1 == 0 && pos2 != 0 && gapend2->len - (pos2-1) > gapend1->len ) pfac -= gapend1->freq * gapend2->freq;
+ else if( pos1 != 0 && pos2 == 0 && gapend1->len - (pos1-1) > gapend2->len ) pfac -= gapend1->freq * gapend2->freq;
+ }
+ }
+
+#if DEBUG
+ reporterr( "\n\nFinal pfac1,pfac2 (%c%d,%c%d, straight) = %f\n\n", seq1[i], i, seq2[j], j, pfac );
+#endif
+#else
+#endif
+ return( pfac );
+}
+#endif
+
+static double calcpfac_gapex_noidatend( Gaplen **gaplen1, Gaplen **gaplen2, int i, int j, int newgaplen, char *seq1, char *seq2, int disp )
+{
+#if 1
+ double pfac, pfac1, pfac2, pfac10;
+ int k, l, pos1, pos2;
+ Gaplen *gaplen1i, *gaplen2j, *g1, *g2;
+
+ gaplen1i = gaplen1[i];
+ gaplen2j = gaplen2[j];
+
+ pfac = 0.0;
+ pfac2 = 0.0;
+ if( gaplen2j ) for( k=0; (g2=gaplen2j+k)->idatnext!=-1; k++ )
+ {
+#if DEBUG
+ int gl;
+ pos2 = gaplen2j[k].relend;
+ gl = gaplen2j[k].len;
+ if( disp ) reporterr( "gaplen2[][].len=%d, .relend=%d, .freq=%f\n", gaplen2[j][k].len, gaplen2[j][k].relend, gaplen2[j][k].freq );
+ if( disp ) reporterr( "gl = %d, newgaplen=%d\n", gl, newgaplen );
+#endif
+ if( (pos2=g2->relend) != 0 ) continue;
+
+ pfac1 = 1.0;
+ pfac10 = 1.0;
+ if( gaplen1i ) for( l=0; (g1=gaplen1i+l)->idatnext!=-1; l++ )
+ {
+ pos1 = g1->relend;
+ pfac10 -= g1->freq;
+#if DEBUG
+ if( disp ) reporterr( "gaplen1[][].len=%d, .relend=%d, .freq=%f\n", gaplen1[i][l].len, gaplen1[i][l].relend, gaplen1[i][l].freq );
+#endif
+ if( newgaplen + g1->len - (pos1) > g2->len - (pos2) ) pfac1 -= g1->freq;
+// reporterr( "pfac1 = %f\n", pfac1 );
+ }
+ pfac += pfac1 * g2->freq;
+
+
+/* ???? */
+ if( newgaplen >= g2->len - (pos2-1) ) // >= or >??
+ {
+ pfac -= pfac10 * g2->freq;
+// reporterr( "Hit! pfac1 = %f\n", pfac1 );
+ }
+/* ???? */
+
+
+// if( gaplen2[j][k].relend == -1 ) pfac += gaplen2[j][k].freq;
+ }
+
+ return( pfac );
+#else
+ double pfac, pfac1, pfac2, pfac10;
+ int k, l, id1, id2, pos1, pos2;
+ Gaplen *gaplen1i, *gaplen2j;
+
+ gaplen1i = gaplen1[i];
+ gaplen2j = gaplen2[j];
+
+ pfac = 0.0;
+ pfac2 = 0.0;
+// for( k=0; gaplen2[j]&&(gl=gaplen2[j][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou
+ if( gaplen2j ) for( k=0; (gaplen2j[k].idatnext)!=-1; k++ ) // ososugi! hash ni atode henkou
+ {
+#if DEBUG
+ int gl;
+ pos2 = gaplen2j[k].relend;
+ gl = gaplen2j[k].len;
+ if( disp ) reporterr( "gaplen2[][].len=%d, .relend=%d, .freq=%f\n", gaplen2[j][k].len, gaplen2[j][k].relend, gaplen2[j][k].freq );
+ if( disp ) reporterr( "gl = %d, newgaplen=%d\n", gl, newgaplen );
+#endif
+ if( (pos2=gaplen2[j][k].relend) != 0 ) continue;
+
+ pfac1 = 1.0;
+ pfac10 = 1.0;
+ if( gaplen1i ) for( l=0; (gaplen1i[l].idatnext)!=-1; l++ ) // ososugi! hash ni atode henkou
+ {
+ pos1 = gaplen1i[l].relend;
+ pfac10 -= gaplen1i[l].freq;
+#if DEBUG
+ if( disp ) reporterr( "gaplen1[][].len=%d, .relend=%d, .freq=%f\n", gaplen1[i][l].len, gaplen1[i][l].relend, gaplen1[i][l].freq );
+#endif
+ if( newgaplen + gaplen1i[l].len - (pos1) > gaplen2j[k].len - (pos2) ) pfac1 -= gaplen1i[l].freq;
+// reporterr( "pfac1 = %f\n", pfac1 );
+ }
+ pfac += pfac1 * gaplen2j[k].freq;
+
+
+/* ???? */
+ if( newgaplen >= gaplen2j[k].len - (pos2-1) ) // >= or >??
+ {
+ pfac -= pfac10 * gaplen2j[k].freq;
+// reporterr( "Hit! pfac1 = %f\n", pfac1 );
+ }
+/* ???? */
+
+
+// if( gaplen2[j][k].relend == -1 ) pfac += gaplen2[j][k].freq;
+ }
+
+ return( pfac );
+
+#endif
+}
+
+
+static double calcpfacnoidatend( Gaplen **gaplen1, Gaplen **gaplen2, int i, int j, char *seq1, char *seq2, int disp ) // seq1 to seq2 ha debug you
+{
+ double pfac, pfac1, pfac2;
+ int k, l, pos1, pos2;
+ Gaplen *gaplen1i, *gaplen2j, *g1, *g2;
+
+ gaplen1i = gaplen1[i];
+ gaplen2j = gaplen2[j];
+
+#if DEBUG
+ if( disp )
+ {
+ reporterr( "seq1[0] = %s\n", seq1 );
+ reporterr( "seq2[0] = %s\n", seq2 );
+ reporterr( "i,j=%d,%d\n", i, j );
+
+ reporterr( "In calcpfacnoidatend(), gaplen1[%d(%c)] = \n", i, seq1[i] );
+ showgaplen( gaplen1, seqlen( seq1 ) );
+ for( k=0; gaplen1[i]&&gaplen1[i][k].idatnext!=-1; k++ )
+ {
+ pos1 = gaplen1[i][k].relend;
+ reporterr( ".len=%d, .relend=%d, .freq=%f (i=%d)\n", gaplen1[i][k].len, gaplen1[i][k].relend, gaplen1[i][k].freq, i );
+ }
+
+ reporterr( "In calcpfacnoidatend(), gaplen2[%d(%c)] = \n", j, seq2[j] );
+ showgaplen( gaplen2, seqlen( seq2 ) );
+ for( k=0; gaplen2[j]&&gaplen2[j][k].idatnext!=-1; k++ )
+ {
+ pos2 = gaplen2[j][k].relend;
+ reporterr( ".len=%d, .relend=%d (j=%d)\n", gaplen2[j][k].len, gaplen2[j][k].relend, j );
+ reporterr( ".freq=%f\n", gaplen2[j][k].freq );
+ }
+ }
+#endif
+
+#if 1
+ pfac1 = pfac2 = 0.0;
+ if( gaplen1i ) for( k=0; (g1=gaplen1i+k)->idatnext!=-1; k++ )
+ {
+ if( (pos1=g1->relend) == 0 ) pfac1 += g1->freq;
+ }
+
+ if( gaplen2j ) for( l=0; (g2=gaplen2j+l)->idatnext!=-1; l++ ) // ososugi! hash ni atode henkou
+ {
+ if( (pos2=g2->relend) == 0 ) pfac2 += g2->freq;
+ }
+#if DEBUG
+ reporterr( "\n\nInitial pfac1,pfac2 (%c%d,%c%d) = %f, %f\n", seq1[i], i, seq2[j], j, pfac1, pfac2 );
+#endif
+ pfac = pfac1 * pfac2 + pfac1 * (1-pfac2) + pfac2 * (1-pfac1);
+#if DEBUG
+ reporterr( "\n\nInitial pfac (%d,%d) = %f\n", i, j, pfac );
+#endif
+
+// if( pfac ) reporterr( "i,j=%d,%d, Cancel (eq len)? pfac = %f -> ", i, j, pfac );
+ if( gaplen1i ) for( k=0; (g1=gaplen1i+k)->idatnext!=-1; k++ ) // ososugi! hash ni atode henkou
+ {
+ pos1=g1->relend;
+ if( gaplen2j ) for( l=0; (g2=gaplen2j+l)->idatnext!=-1; l++ ) // ososugi! hash ni atode henkou
+ {
+ pos2 = gaplen2j[l].relend;
+ if ( pos1 == 0 && pos2 == 0 && g1->len == g2->len ) pfac -= g1->freq * g2->freq;
+ else if( pos1 == 0 && pos2 != 0 && g2->len - (pos2-1) > g1->len ) pfac -= g1->freq * g2->freq;
+ else if( pos1 != 0 && pos2 == 0 && g1->len - (pos1-1) > g2->len ) pfac -= g1->freq * g2->freq;
+ }
+ }
+
+#else
+
+ pfac1 = pfac2 = 0.0;
+ if( gaplen1i ) for( k=0; (gaplen1i[k].idatnext)!=-1; k++ )
+ {
+ if( gaplen1i[k].relend == 0 ) pfac1 += gaplen1[i][k].freq;
+ }
+
+ if( gaplen2j ) for( l=0; (gaplen2j[l].idatnext)!=-1; l++ ) // ososugi! hash ni atode henkou
+ {
+ if( gaplen2j[l].relend == 0 ) pfac2 += gaplen2[j][l].freq;
+ }
+#if DEBUG
+ reporterr( "\n\nInitial pfac1,pfac2 (%c%d,%c%d) = %f, %f\n", seq1[i], i, seq2[j], j, pfac1, pfac2 );
+#endif
+ pfac = pfac1 * pfac2 + pfac1 * (1-pfac2) + pfac2 * (1-pfac1);
+#if DEBUG
+ reporterr( "\n\nInitial pfac (%d,%d) = %f\n", i, j, pfac );
+#endif
+
+#if 1
+// if( pfac ) reporterr( "i,j=%d,%d, Cancel (eq len)? pfac = %f -> ", i, j, pfac );
+ if( gaplen1i ) for( k=0; (gaplen1i[k].idatnext)!=-1; k++ ) // ososugi! hash ni atode henkou
+ {
+ pos1=gaplen1i[k].relend;
+ if( gaplen2j ) for( l=0; (gaplen2j[l].idatnext)!=-1; l++ ) // ososugi! hash ni atode henkou
+ {
+ pos2 = gaplen2j[l].relend;
+ if ( pos1 == 0 && pos2 == 0 && gaplen1i[k].len == gaplen2j[l].len ) pfac -= gaplen1i[k].freq * gaplen2j[l].freq;
+ else if( pos1 == 0 && pos2 != 0 && gaplen2j[l].len - (pos2-1) > gaplen1i[k].len ) pfac -= gaplen1i[k].freq * gaplen2j[l].freq;
+ else if( pos1 != 0 && pos2 == 0 && gaplen1i[k].len - (pos1-1) > gaplen2j[l].len ) pfac -= gaplen1i[k].freq * gaplen2j[l].freq;
+ }
+ }
+#endif
+#endif
+
+#if DEBUG
+ reporterr( "\n\nFinal pfac1,pfac2 (%c%d,%c%d, straight) = %f\n\n", seq1[i], i, seq2[j], j, pfac );
+#endif
+ return( pfac );
+}
+
+
+static void extendgaplencompactx( Gaplen **cpy, Gaplen **orig, int start )
+{
+ Gaplen *opt, *cpt;
+ int l, id;
+#if DEBUG
+ Gaplen cpybk;
+#endif
+
+// if( start < 0 ) start = 0;
+
+ if( orig[start] == NULL )
+ {
+ if( cpy[start] )
+ {
+ free( cpy[start] );
+ cpy[start] = NULL;
+ }
+ return;
+ }
+
+
+#if DEBUG
+ reporterr( "At first, cpy -> \n" );
+ showgaplen( cpy, 100 );
+ reporterr( "Look at %d \n", start );
+#endif
+
+ if( cpy[start] == NULL )
+ {
+ l = orig[start][0].npat;
+
+ cpy[start] = realloc( cpy[start], (l+2) * sizeof( Gaplen ) );
+
+#if 0
+ for( l=0; (gl=orig[start][l].idatend)!=-1; l++ )
+ cpy[start][l] = orig[start][l]; // freq ha pointer de copy
+ cpy[start][l] = orig[start][l]; // dekiru?
+#else
+ for( opt = orig[start],cpt = cpy[start]; opt->idatnext!=-1; )
+ *cpt++ = *opt++;
+ *cpt = *opt;
+#endif
+ }
+
+#if DEBUG
+ cpybk = cpy[start][0];
+#endif
+
+#if 0
+ for( l=0; (opt=orig[start]+l)->idatend!=-1; l++ )
+ {
+ if( (pos=opt->relend) == 0 ) continue;
+
+ if( cpy[posplus=start+pos] != NULL )
+ {
+ id = opt->idatend;
+// reporterr( "cpy[%d][%d].len: %d -> %d (relend=%d)\n", start, l, cpy[start][l].len, cpy[posplus][id].len, pos );
+ cpy[start][l].len = cpy[posplus][id].len; // Ato de posplus wo tsukawanaiyouni henkou.
+ continue; // HITSUYOU!!!
+ }
+ else
+ {
+// reporterr( "cpy[%d][%d].len: %d (relend=%d)\n", start, l, cpy[start][l].len, pos );
+ }
+
+#if 0
+ for( k=0; orig[start+pos][k].idatend!=-1; k++ )
+ ;
+#else
+ optplus = orig[posplus];
+ k = optplus->npat;
+#endif
+
+
+ cptplus = cpy[posplus] = realloc( cpy[posplus], (k+2) * sizeof( Gaplen ) );
+// cptplus = realloc( cptplus, (k+2) * sizeof( Gaplen ) );
+
+#if 0
+ for( k=0; optplus[k].idatend!=-1; k++ )
+ {
+ cptplus[k] = optplus[k]; // dekiru?
+ }
+ cptplus[k] = optplus[k]; // dekiru?
+#else
+ while( optplus->idatend!=-1 ) *cptplus++ = *optplus++;
+ *cptplus = *optplus;
+#endif
+ }
+#endif
+
+
+ if( start == 0 ) return;
+ if( cpy[start-1] == NULL ) return;
+
+#if DEBUG
+ reporterr( "cpy -> \n" );
+ showgaplen( cpy, 100 );
+ reporterr( "Look at %d \n", start );
+#endif
+
+ for( l=0; orig[start][l].idatnext!=-1; l++ )
+ {
+ if( (id=orig[start][l].idatprev) == -1 ) continue;
+
+// if( cpy[start][l].relend != 0 ) cpy[start][l].len = cpy[start-1][id].len; // Shinchou ni
+ cpy[start][l].len = cpy[start-1][id].len; // Shinchou ni
+
+// if( cpy[start][l].len != cpy[start-1][id].len )
+#if DEBUG
+ if( 1 || cpy[start][l].len != cpy[start-1][id].len )
+ {
+ reporterr( "Check!! cpy[%d][%d].len=%d, but [start-1][].len=%d, relend=%d\n", start, l, cpy[start][l].len, cpy[start-1][id].len, cpy[start][l].relend );
+ reporterr( "orig[%d][%d].len=%d, relend=%d\n", start, l, orig[start][l].len, orig[start][l].relend );
+ reporterr( "cpybk.len=%d, relend=%d\n", cpybk.len, cpybk.relend );
+
+ }
+ else
+ {
+// reporterr( "OK, cpy[%d][%d].len=%d, relend=%d\n", start, l, cpy[start][l].len, cpy[start][l].relend );
+ }
+#endif
+ }
+
+}
+
+
+#if USEGAPLENHALFORMTX
+static void extendgaplenpartly( Gaplen **cpy, Gaplen **orig, int start, int end )
+{
+ int i, l, gl, extrascope;
+ Gaplen *pt;
+
+ if( start < 0 ) start = 0;
+// for( i=start; i<=end; i++ )
+// {
+// if( cpy[i] == (Gaplen *)1 )
+// {
+// end = i-1;
+// break;
+//// reporterr( "Okashii! i=%d\n", i );
+//// exit( 1 );
+// }
+// if( cpy[i] ) free( cpy[i] );
+// cpy[i] = NULL;
+// }
+
+
+ extrascope = 0;
+#if 0
+ for( i=start; i<=end; i++ ) if( orig[i] )
+ {
+ for( pt=orig[i]; (pt->idatend)!=-1; )
+ {
+ if( (gl=pt++->relend) > extrascope ) extrascope = i+gl-end+1;
+ }
+// extrascope = 10; // Kinji
+
+ }
+#else
+ if( orig[end] )
+ {
+ for( pt=orig[end]; (pt->idatend)!=-1; )
+ {
+ if( (gl=pt++->relend) > extrascope ) extrascope = gl;
+ }
+// extrascope = 10; // Kinji
+
+ }
+#endif
+ end += extrascope;
+
+ for( i=start; i<=end; i++ )
+ {
+ if( cpy[i] != NULL ) continue;
+
+ if( orig[i] == NULL )
+ {
+ if( cpy[i] ) free( cpy[i] ); // muda dakedo
+ cpy[i] = NULL;
+ continue;
+ }
+
+ for( l=0; (gl=orig[i][l].idatend)!=-1; l++ )
+ ;
+
+ cpy[i] = realloc( cpy[i], (l+2) * sizeof( Gaplen ) );
+// cpy[i] = calloc( sizeof( Gaplen ), l+2 );
+
+ for( l=0; (gl=orig[i][l].idatend)!=-1; l++ )
+ {
+#if 1
+ cpy[i][l] = orig[i][l]; // freq ha pointer de copy
+#else
+ cpy[i][l].len = gl;
+ cpy[i][l].relend = orig[i][l].relend;
+ cpy[i][l].freq = orig[i][l].freq;
+ cpy[i][l].gapidatend = orig[i][l].gapidatend;
+#endif
+
+// reporterr( "i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend );
+ }
+ cpy[i][l] = orig[i][l]; // dekiru?
+// cpy[i][l].relend = -1;
+// cpy[i][l].len = -1;
+ }
+
+}
+#endif
+
+static void duplicategaplencompactx( Gaplen **cpy, Gaplen **orig, int maxlen, int start, int end )
+{
+ int i, l;
+
+
+ if( start < 0 ) start = 0;
+ for( i=start; i<=end; i++ )
+ {
+// reporterr( "i=%d / %d\n", i, maxlen );
+ if( cpy[i] == (Gaplen *)1 )
+ {
+ end = i-1;
+ break;
+// reporterr( "Okashii! i=%d\n", i );
+// exit( 1 );
+ }
+ if( cpy[i] ) free( cpy[i] );
+ cpy[i] = NULL;
+ }
+
+ for( i=start; i<=end; i++ )
+ {
+ if( orig[i] == NULL )
+ {
+ if( cpy[i] ) free( cpy[i] ); // muda dakedo
+ cpy[i] = NULL;
+ continue;
+ }
+
+#if 0
+ for( l=0; (gl=orig[i][l].idatend)!=-1; l++ )
+ ;
+#else
+ l = orig[i][0].npat;
+#endif
+
+ cpy[i] = realloc( cpy[i], (l+2) * sizeof( Gaplen ) );
+// cpy[i] = calloc( sizeof( Gaplen ), l+2 );
+
+ for( l=0; orig[i][l].idatnext!=-1; l++ )
+ {
+ cpy[i][l] = orig[i][l]; // freq ha pointer de copy
+// reporterr( "i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend );
+ }
+ cpy[i][l] = orig[i][l]; // dekiru?
+// cpy[i][l].relend = -1;
+// cpy[i][l].len = -1;
+ }
+
+ return;
+}
+
+
+
+#if USEGAPLENHALFORMTX
+static void duplicategaplenpartly( Gaplen **cpy, Gaplen **orig, int start, int end )
+{
+ int i, l, gl, extrascope;
+ Gaplen *pt;
+
+ if( start < 0 ) start = 0;
+ for( i=start; i<=end; i++ )
+ {
+ if( cpy[i] == (Gaplen *)1 )
+ {
+ end = i-1;
+ break;
+// reporterr( "Okashii! i=%d\n", i );
+// exit( 1 );
+ }
+ if( cpy[i] ) free( cpy[i] );
+ cpy[i] = NULL;
+ }
+
+
+ extrascope = 0;
+#if 0
+ for( i=start; i<=end; i++ ) if( orig[i] )
+ {
+ for( pt=orig[i]; (pt->idatend)!=-1; )
+ {
+ if( (gl=pt++->relend) > extrascope ) extrascope = i+gl-end+1;
+ }
+// extrascope = 10; // Kinji
+
+ }
+#else
+ if( orig[end] )
+ {
+ for( pt=orig[end]; (pt->idatend)!=-1; )
+ {
+ if( (gl=pt++->relend) > extrascope ) extrascope = gl;
+ }
+// extrascope = 10; // Kinji
+
+ }
+#endif
+ end += extrascope;
+
+ for( i=start; i<=end; i++ )
+ {
+ if( orig[i] == NULL )
+ {
+ if( cpy[i] ) free( cpy[i] ); // muda dakedo
+ cpy[i] = NULL;
+ continue;
+ }
+
+ for( l=0; (gl=orig[i][l].idatend)!=-1; l++ )
+ ;
+
+ cpy[i] = realloc( cpy[i], (l+2) * sizeof( Gaplen ) );
+// cpy[i] = calloc( sizeof( Gaplen ), l+2 );
+
+ for( l=0; (gl=orig[i][l].idatend)!=-1; l++ )
+ {
+#if 1
+ cpy[i][l] = orig[i][l]; // freq ha pointer de copy
+#else
+ cpy[i][l].len = gl;
+ cpy[i][l].relend = orig[i][l].relend;
+ cpy[i][l].freq = orig[i][l].freq;
+ cpy[i][l].gapidatend = orig[i][l].gapidatend;
+#endif
+
+// reporterr( "i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend );
+ }
+ cpy[i][l] = orig[i][l]; // dekiru?
+// cpy[i][l].relend = -1;
+// cpy[i][l].len = -1;
+ }
+
+}
+#endif
+
+
+static void gaplenextendnoidatend( Gaplen **cpy, int gapstartpos, int insertionlen )
+{
+ int l, id, idn, pos, len;
+
+#if 0
+// reporterr( "inserting %d gaps at position %d\n", insertionlen, gapstartpos );
+ for( l=0; cpy[gapstartpos] && (id=cpy[gapstartpos][l].idatend) !=-1; l++ )
+ {
+ pos = cpy[gapstartpos][l].relend;
+ cpy[gapstartpos+pos][id].len += insertionlen;
+ }
+#endif
+
+#if 1
+ for( l=0; cpy[gapstartpos] && (id=cpy[gapstartpos][l].idatnext) !=-1; l++ )
+ {
+ len = cpy[gapstartpos][l].len + insertionlen;
+// reporterr( "ext\n" );
+ for( pos=gapstartpos, idn=l; cpy[pos] != NULL && cpy[pos][idn].relend != 0; pos++ )
+ {
+// reporterr( "%d, plus %d %d->%d\n", pos, insertionlen, cpy[pos][idn].len, cpy[pos][idn].len+insertionlen );
+ cpy[pos][idn].len = len;
+ idn = cpy[pos][idn].idatnext;
+// if( pos == gapstartpos + 1 ) break;
+ break;
+ }
+// reporterr( "end\n" );
+
+ idn = cpy[gapstartpos][l].idatprev;
+ if( gapstartpos != 0 && idn != -1 && cpy[gapstartpos-1] ) cpy[gapstartpos-1][idn].len = len;
+ }
+#endif
+}
+
+
+#if USEGAPLENHALFORMTX
+
+static void gaplenextend( Gaplen **cpy, int gapstartpos, int insertionlen )
+{
+ int l, id, idn, pos, len;
+
+#if 1
+// reporterr( "inserting %d gaps at position %d\n", insertionlen, gapstartpos );
+ for( l=0; cpy[gapstartpos] && (id=cpy[gapstartpos][l].idatend) !=-1; l++ )
+ {
+ pos = cpy[gapstartpos][l].relend;
+ cpy[gapstartpos+pos][id].len += insertionlen;
+ }
+#endif
+
+#if 1
+ for( l=0; cpy[gapstartpos] && (id=cpy[gapstartpos][l].idatend) !=-1; l++ )
+ {
+ len = cpy[gapstartpos][l].len + insertionlen;
+// reporterr( "ext\n" );
+ for( pos=gapstartpos, idn=l; cpy[pos] != NULL && cpy[pos][idn].relend != 0; pos++ )
+ {
+// reporterr( "%d, plus %d %d->%d\n", pos, insertionlen, cpy[pos][idn].len, cpy[pos][idn].len+insertionlen );
+ cpy[pos][idn].len = len;
+ idn = cpy[pos][idn].idatnext;
+// if( pos == gapstartpos + 1 ) break;
+// break;
+ }
+// reporterr( "end\n" );
+
+ idn = cpy[gapstartpos][l].idatprev;
+ if( gapstartpos != 0 && idn != -1 && cpy[gapstartpos-1] ) cpy[gapstartpos-1][idn].len = len;
+ }
+#endif
+}
+#endif
+
+static void copygaplencompactx( Gaplen **cpy, Gaplen **orig, int seqlen, int gapstartpos, int insertionlen, int posincopy, int posinori )
+{
+ Gaplen *pt, *cpt;
+
+
+
+#if DEBUG
+ reporterr( "At the head of copygaplencompactx, cpy=\n" );
+ showgaplen( cpy+posincopy, 100 );
+ reporterr( "At the head of copygaplencompactx, orig=\n" );
+ showgaplen( orig+posinori, 100 );
+ reporterr( "posinori=%d\n", posinori );
+#endif
+
+ if( orig[posinori] == NULL ) return;
+
+// for( pt=orig[posinori],cpt=cpy[posincopy]; pt->relend==0; ) // zenhan ni relend=0 ga matomatteirukara.
+ for( pt=orig[posinori],cpt=cpy[posincopy]; pt->idatnext!=-1; ) // kouhan mo copy
+ {
+ cpt++->len = pt++->len;
+ }
+
+
+#if 0
+ for( l=0; (id=orig[posinori][l].idatend)!=-1; l++ )
+ {
+ pos = orig[posinori][l].relend;
+ if( pos == 0 ) continue;
+ if( orig[posinori+pos] == NULL )
+ {
+ reporterr( "Okashii\n" );
+ PFACERROR = 1;
+ continue;
+ }
+
+#if 0
+ for( k=0; orig[posinori+pos][k].relend==0; k++ ) // zenhan dake
+ {
+ cpy[posincopy+pos][k].len = orig[posinori+pos][k].len; // dekiru?
+ }
+#else
+ cpy[posincopy+pos][id].len = orig[posinori+pos][id].len; // dekiru?
+#endif
+ }
+#endif
+
+
+ if( gapstartpos == -1 ) gapstartpos = posincopy;
+ gaplenextendnoidatend( cpy, gapstartpos, insertionlen );
+
+#if DEBUG
+ reporterr( "At the end of copygaplencompactx, cpy=\n" );
+ showgaplen( cpy+posincopy, 100 );
+#endif
+}
+
+
+#if USEGAPLENHALF
+static void copygaplenrestricted_zurasu( Gaplen **cpy, Gaplen **orig, int seqlen, int gapstartpos, int insertionlen, int startincopy, int endincopy, int startinori, int endinori )
+{
+ int i, extrascope, gl, j;
+ int zure, newend;
+ Gaplen *pt, *cpt;
+// int ncopied = 0;
+
+#if 0
+// mae houkou nimo renzoku gap de enchou suru hitsuyou ga aru to omou.
+ for( i=startinori-1; 0<=i&&i<=seqlen; i-- )
+ {
+// reporterr( "i=%d\n", i );
+ if( orig[i] == NULL ) break;
+ for( pt=orig[i],cpt=cpy[i]; (gl=pt++->len)!=-1; ) cpt++->len = gl;
+ }
+#endif
+
+ zure = startincopy - startinori; // end ha check shinai
+
+// int ncopied = 0;
+ if( orig[endinori] )
+ {
+ extrascope = 0;
+ for( pt=orig[endinori]; (pt->idatend)!=-1; )
+ {
+ if( (gl=pt++->relend) > extrascope ) extrascope = gl;
+ }
+// extrascope = 10; // Kinji
+
+ newend = endinori + extrascope;
+ }
+ else newend = endinori;
+
+// reporterr( "ncopy = %d\n", newend - startinori );
+//
+#if 0 // extra end wo shizen ni kimereba iranai
+ if( newend > seqlen ) newend = seqlen;
+// if( startinori < 0 ) startinori = 0;
+#endif
+
+ for( i=startinori, j=startincopy; i<=newend; i++, j++ )
+ {
+ if( orig[i] == NULL ) continue;
+
+// ncopied += 1;
+
+#if 0
+ for( pt=orig[i],cpt=cpy[i]; (gl=pt++->len)!=-1; )
+ cpt++->len = gl;
+#else
+ for( pt=orig[i],cpt=cpy[j]; pt->relend==0; ) // zenhan ni relend=0 ga matomatteirukara.
+// int k;
+// for( k=0; orig[i][k].relend==0; k++ ) // zenhan ni relend=0 ga matomatteirukara.
+ {
+ cpt++->len = pt++->len;
+// reporterr( "i=%d, k=%d\n", i, k );
+// cpy[i][k].len = orig[i][k].len;
+ }
+#endif
+ }
+
+
+
+#if 0
+ for( i=0; i<=seqlen; i++ )
+ {
+ for( l=0; cpy[i]&&(gl=cpy[i][l].len)!=-1; l++ )
+ reporterr( "after copy, i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend );
+ }
+#endif
+
+ if( gapstartpos < 0 ) return;
+
+ gaplenextend( cpy, gapstartpos, insertionlen );
+
+
+
+// return;
+
+
+// TEST
+// for( i=endinori+1; i<=newend; i++ )
+ for( i=endincopy+1; i<=newend+zure; i++ )
+ {
+ if( cpy[i] == NULL ) continue;
+ for( j=0; cpy[i][j].idatend!=-1; j++ )
+ {
+ if( cpy[i][j].relend == 0 )
+ {
+ break;
+ }
+ }
+ if( cpy[i][j].idatend == -1 )
+ {
+ free( cpy[i] );
+ cpy[i] = NULL;
+ }
+ }
+
+
+
+
+
+
+#if 0
+ reporterr( "\n" );
+ for( i=0; i<=seqlen; i++ )
+ {
+ for( l=0; cpy[i]&&(gl=cpy[i][l].len)!=-1; l++ )
+ reporterr( "after add, i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend );
+ }
+#endif
+}
+#endif
+
+#if USEGAPLENHALFORMTX
+static void copygaplenrestricted( Gaplen **cpy, Gaplen **orig, int seqlen, int gapstartpos, int insertionlen, int scopestart, int scopeend )
+{
+ int i, j, extrascope, gl, endinori, newend;
+ Gaplen *pt, *cpt;
+// int ncopied = 0;
+
+#if 0
+// mae houkou nimo renzoku gap de enchou suru hitsuyou ga aru to omou.
+ for( i=scopestart-1; 0<=i&&i<=seqlen; i-- )
+ {
+// reporterr( "i=%d\n", i );
+ if( orig[i] == NULL ) break;
+ for( pt=orig[i],cpt=cpy[i]; (gl=pt++->len)!=-1; ) cpt++->len = gl;
+ }
+#endif
+
+// int ncopied = 0;
+ endinori = scopeend;
+ if( orig[scopeend] )
+ {
+ extrascope = 0;
+ for( pt=orig[scopeend]; (pt->idatend)!=-1; )
+ {
+ if( (gl=pt++->relend) > extrascope ) extrascope = gl;
+ }
+// extrascope = 10; // Kinji
+
+ scopeend += extrascope;
+ }
+ newend = scopeend;
+
+// reporterr( "ncopy = %d\n", scopeend - scopestart );
+//
+#if 0 // extra end wo shizen ni kimereba iranai
+ if( scopeend > seqlen ) scopeend = seqlen;
+// if( scopestart < 0 ) scopestart = 0;
+#endif
+
+ if( scopestart < 0 ) scopestart = 0;
+ for( i=scopestart; i<=scopeend; i++ )
+ {
+ if( orig[i] == NULL ) continue;
+
+// ncopied += 1;
+
+#if 0
+ for( pt=orig[i],cpt=cpy[i]; (gl=pt++->len)!=-1; )
+ cpt++->len = gl;
+#else
+ for( pt=orig[i],cpt=cpy[i]; pt->relend==0; ) // zenhan ni relend=0 ga matomatteirukara.
+// int k;
+// for( k=0; orig[i][k].relend==0; k++ ) // zenhan ni relend=0 ga matomatteirukara.
+ {
+ cpt++->len = pt++->len;
+// reporterr( "i=%d, k=%d\n", i, k );
+// cpy[i][k].len = orig[i][k].len;
+ }
+#endif
+ }
+
+
+
+#if 0
+ for( i=0; i<=seqlen; i++ )
+ {
+ for( l=0; cpy[i]&&(gl=cpy[i][l].len)!=-1; l++ )
+ reporterr( "after copy, i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend );
+ }
+#endif
+
+ if( gapstartpos < 0 ) return;
+
+ gaplenextend( cpy, gapstartpos, insertionlen );
+
+ return;
+
+// TEST extra scope de tsukaunoha end dake?
+ for( i=endinori+1; i<=newend; i++ )
+ {
+ if( cpy[i] == NULL ) continue;
+ for( j=0; cpy[i][j].idatend!=-1; j++ )
+ {
+ if( cpy[i][j].relend == 0 ) break;
+ }
+ if( cpy[i][j].idatend == -1 )
+ {
+ free( cpy[i] );
+ cpy[i] = NULL;
+ }
+ }
+
+
+
+#if 0
+ reporterr( "\n" );
+ for( i=0; i<=seqlen; i++ )
+ {
+ for( l=0; cpy[i]&&(gl=cpy[i][l].len)!=-1; l++ )
+ reporterr( "after add, i=%d, l=%d, len=%d, freq=%f, relend=%d\n", i, l, cpy[i][l].len, cpy[i][l].freq, cpy[i][l].relend );
+ }
+#endif
+}
+#endif
+
+#if 1
+static void freegaplenpartly( Gaplen **mtx, int startpos, int endpos )
+{
+ int i;
+ Gaplen **pt;
+ if( startpos < 0 ) startpos = 0;
+
+ for( i=startpos; i<=endpos; i++ )
+ {
+ if( *(pt=mtx+i) == (Gaplen *)1 ) break;
+ if( *pt ) free( *pt );
+ *pt = NULL;
+ }
+}
+#else
+static void freegaplenpartly( Gaplen **mtx, int startpos, int endpos )
+{
+ int i;
+ if( startpos < 0 ) startpos = 0;
+
+ for( i=startpos; i<=endpos; i++ )
+ {
+ if( mtx[i] == (Gaplen *)1 ) break;
+ if( mtx[i] ) free( mtx[i] );
+ mtx[i] = NULL;
+ }
+}
+#endif
+
+
+double D__align( double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp )
+/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
+{
+
+// int k;
+ register int i, j;
+
+
+
+
+ int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
+ int lgth1, lgth2;
+ int resultlen;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
+// double fpenalty = (double)penalty;
+#if USE_PENALTY_EX
+ double fpenalty_ex = (double)penalty_ex;
+#endif
+#if 1
+ double *wtmp;
+ int *ijppt;
+ double *mjpt, *prept, *curpt;
+ int *mpjpt;
+#endif
+ static TLS double mi, *m;
+ static TLS int **ijp;
+ static TLS int mpi, *mp;
+ static TLS double *w1, *w2;
+ static TLS double *match;
+ static TLS double *initverticalw; /* kufuu sureba iranai */
+ static TLS double *lastverticalw; /* kufuu sureba iranai */
+ static TLS char **mseq1;
+ static TLS char **mseq2;
+ static TLS char **mseq;
+ static TLS double **cpmx1;
+ static TLS double **cpmx2;
+ static TLS int **intwork;
+ static TLS double **doublework;
+ static TLS int orlgth1 = 0, orlgth2 = 0;
+#if USEGAPLENHALF
+ Gaplen ****gaplen1half = NULL; // NULL ga iru to omou.
+ Gaplen ****gaplen2half = NULL; // NULL ga iru to omou.
+#endif
+#if USEGAPLENMTX
+ Gaplen ****gaplen1mtx = NULL; // NULL ga iru to omou.
+ Gaplen ****gaplen2mtx = NULL; // NULL ga iru to omou.
+#endif
+ static TLS Gaplen **gaplen1 = NULL; // NULL ga iru to omou.
+ static TLS Gaplen **gaplen2 = NULL; // NULL ga iru to omou.
+ static TLS Gaplen ***gaplen1jprev = NULL;
+ static TLS Gaplen ***gaplen2jprev = NULL;
+ static TLS Gaplen ***gaplen1jcurr = NULL;
+ static TLS Gaplen ***gaplen2jcurr = NULL;
+ static TLS Gaplen ***gaplen1icurr = NULL;
+ static TLS Gaplen ***gaplen2icurr = NULL;
+ static TLS Gaplen ***gaplen1jbestkamo = NULL;
+ static TLS Gaplen ***gaplen2jbestkamo = NULL;
+ static TLS Gaplen ***gaplen1ibestkamo = NULL;
+ static TLS Gaplen ***gaplen2ibestkamo = NULL;
+ static TLS Gaplen ***gaplen1jbest = NULL;
+ static TLS Gaplen ***gaplen2jbest = NULL;
+ double fpenalty = (double)penalty;
+ double fpenalty_shift = (double)penalty_shift;
+ static TLS Gaplen ****gaplens = NULL;
+
+ Gaplen ***gaplentmp = NULL;
+ int *warpis = NULL;
+ int *warpjs = NULL;
+ int *warpi = NULL;
+ int *warpj = NULL;
+ int *prevwarpi = NULL;
+ int *prevwarpj = NULL;
+ double *wmrecords = NULL;
+ double *prevwmrecords = NULL;
+ int warpn = 0;
+ int warpbase;
+ double curm = 0.0;
+ double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt;
+ int *warpipt, *warpjpt;
+ int k;
+ double pfac, pfactmp;
+ int newgaplen;
+
+// for( i=0; i<icyc; i++ ) fprintf( stderr, "%s, %f\n", seq1[i], eff1[i] );
+// for( i=0; i<jcyc; i++ ) fprintf( stderr, "%s, %f\n", seq2[i], eff2[i] );
+
+// reporterr( "\nsizeof(gaplen) = %d\n", sizeof( Gaplen ) );
+// reporterr( "\nsizeof(int) = %d\n", sizeof( int ) );
+// reporterr( "\nsizeof(double) = %d\n", sizeof( double ) );
+// reporterr( "\nsizeof(double*) = %d\n", sizeof( double * ) );
+
+
+ if( seq1 == NULL )
+ {
+ if( orlgth1 )
+ {
+// fprintf( stderr, "## Freeing local arrays in D__align\n" );
+ orlgth1 = 0;
+ orlgth2 = 0;
+
+ imp_match_init_strictD( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL );
+
+ free( mseq1 );
+ free( mseq2 );
+ FreeFloatVec( w1 );
+ FreeFloatVec( w2 );
+ FreeFloatVec( match );
+ FreeFloatVec( initverticalw );
+ FreeFloatVec( lastverticalw );
+
+ FreeFloatVec( m );
+ FreeIntVec( mp );
+
+ FreeCharMtx( mseq );
+
+ FreeFloatMtx( cpmx1 );
+ FreeFloatMtx( cpmx2 );
+
+ FreeFloatMtx( doublework );
+ FreeIntMtx( intwork );
+
+
+
+ free( gaplens );
+ if( gaplen1ibestkamo ) FreeGaplenCub( gaplen1ibestkamo ); gaplen1ibestkamo = NULL;
+ if( gaplen2ibestkamo ) FreeGaplenCub( gaplen2ibestkamo ); gaplen2ibestkamo = NULL;
+ if( gaplen1icurr ) FreeGaplenCub( gaplen1icurr ); gaplen1icurr = NULL;
+ if( gaplen2icurr ) FreeGaplenCub( gaplen2icurr ); gaplen2icurr = NULL;
+
+ if( gaplen1jprev ) FreeGaplenCub( gaplen1jprev ); gaplen1jprev = NULL;
+ if( gaplen2jprev ) FreeGaplenCub( gaplen2jprev ); gaplen2jprev = NULL;
+ if( gaplen1jcurr ) FreeGaplenCub( gaplen1jcurr ); gaplen1jcurr = NULL;
+ if( gaplen2jcurr ) FreeGaplenCub( gaplen2jcurr ); gaplen2jcurr = NULL;
+ if( gaplen1jbestkamo ) FreeGaplenCub( gaplen1jbestkamo ); gaplen1jbestkamo = NULL;
+ if( gaplen2jbestkamo ) FreeGaplenCub( gaplen2jbestkamo ); gaplen2jbestkamo = NULL;
+ if( gaplen1jbest ) FreeGaplenCub( gaplen1jbest ); gaplen1jbest = NULL;
+ if( gaplen2jbest ) FreeGaplenCub( gaplen2jbest ); gaplen2jbest = NULL;
+ if( gaplen1 ) FreeGaplenMtx( gaplen1, 1 ); gaplen1 = NULL;
+ if( gaplen2 ) FreeGaplenMtx( gaplen2, 1 ); gaplen2 = NULL;
+ }
+ else
+ {
+// fprintf( stderr, "## Not allocated\n" );
+ }
+ return( 0.0 );
+ }
+
+
+ lgth1 = strlen( seq1[0] );
+ lgth2 = strlen( seq2[0] );
+
+
+ reporterr( "%d x %d sequences, len=%d, %d\n", icyc, jcyc, lgth1, lgth2 );
+
+
+#if 0
+ if( lgth1 == 0 || lgth2 == 0 )
+ {
+ fprintf( stderr, "WARNING (Aalignmm): lgth1=%d, lgth2=%d\n", lgth1, lgth2 );
+ }
+#endif
+ if( lgth1 == 0 && lgth2 == 0 )
+ return( 0.0 );
+
+ if( lgth1 == 0 )
+ {
+ for( i=0; i<icyc; i++ )
+ {
+ j = lgth2;
+ seq1[i][j] = 0;
+ while( j ) seq1[i][--j] = *newgapstr;
+// fprintf( stderr, "seq1[i] = %s\n", seq1[i] );
+ }
+ return( 0.0 );
+ }
+
+ if( lgth2 == 0 )
+ {
+ for( i=0; i<jcyc; i++ )
+ {
+ j = lgth1;
+ seq2[i][j] = 0;
+ while( j ) seq2[i][--j] = *newgapstr;
+// fprintf( stderr, "seq2[i] = %s\n", seq2[i] );
+ }
+ return( 0.0 );
+ }
+
+ warpbase = lgth1 + lgth2;
+ warpis = NULL;
+ warpjs = NULL;
+ warpn = 0;
+
+
+
+ if( trywarp )
+ {
+// reporterr( "Not supported yet!\n" );
+// exit( 1 );
+// fprintf( stderr, "IN D__align, penalty_shift = %d\n", penalty_shift );
+ if( headgp == 0 || tailgp == 0 )
+ {
+ fprintf( stderr, "At present, headgp and tailgp must be 1 to allow shift.\n" );
+ exit( 1 );
+ }
+ wmrecords = AllocateFloatVec( lgth2+1 );
+ warpi = AllocateIntVec( lgth2+1 );
+ warpj = AllocateIntVec( lgth2+1 );
+ prevwmrecords = AllocateFloatVec( lgth2+1 );
+ prevwarpi = AllocateIntVec( lgth2+1 );
+ prevwarpj = AllocateIntVec( lgth2+1 );
+ for( i=0; i<lgth2+1; i++ ) wmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwarpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) prevwarpj[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpj[i] = -warpbase;
+ }
+
+
+#if 0
+ fprintf( stderr, "#### eff in SA+++align\n" );
+ fprintf( stderr, "#### seq1[0] = %s\n", seq1[0] );
+ fprintf( stderr, "#### strlen( seq1[0] ) = %d\n", strlen( seq1[0] ) );
+ for( i=0; i<icyc; i++ ) fprintf( stderr, "eff1[%d] = %f\n", i, eff1[i] );
+ fprintf( stderr, "#### seq2[0] = %s\n", seq2[0] );
+ fprintf( stderr, "#### strlen( seq2[0] ) = %d\n", strlen( seq2[0] ) );
+ for( i=0; i<jcyc; i++ ) fprintf( stderr, "eff2[%d] = %f\n", i, eff2[i] );
+#endif
+ if( orlgth1 == 0 )
+ {
+ mseq1 = AllocateCharMtx( njob, 0 );
+ mseq2 = AllocateCharMtx( njob, 0 );
+ }
+
+ if( lgth1 > orlgth1 || lgth2 > orlgth2 )
+ {
+ int ll1, ll2;
+
+
+ if( orlgth1 > 0 && orlgth2 > 0 )
+ {
+ FreeFloatVec( w1 );
+ FreeFloatVec( w2 );
+ FreeFloatVec( match );
+ FreeFloatVec( initverticalw );
+ FreeFloatVec( lastverticalw );
+
+ FreeFloatVec( m );
+ FreeIntVec( mp );
+
+ FreeCharMtx( mseq );
+
+ FreeFloatMtx( cpmx1 );
+ FreeFloatMtx( cpmx2 );
+
+ FreeFloatMtx( doublework );
+ FreeIntMtx( intwork );
+
+
+ free( gaplens );
+
+ if( gaplen1ibestkamo ) FreeGaplenCub( gaplen1ibestkamo ); gaplen1ibestkamo = NULL;
+ if( gaplen2ibestkamo ) FreeGaplenCub( gaplen2ibestkamo ); gaplen2ibestkamo = NULL;
+ if( gaplen1icurr ) FreeGaplenCub( gaplen1icurr ); gaplen1icurr = NULL;
+ if( gaplen2icurr ) FreeGaplenCub( gaplen2icurr ); gaplen2icurr = NULL;
+
+ if( gaplen1jcurr ) FreeGaplenCub( gaplen1jcurr ); gaplen1jcurr = NULL;
+ if( gaplen1jprev ) FreeGaplenCub( gaplen1jprev ); gaplen1jprev = NULL;
+ if( gaplen2jcurr ) FreeGaplenCub( gaplen2jcurr ); gaplen2jcurr = NULL;
+ if( gaplen2jprev ) FreeGaplenCub( gaplen2jprev ); gaplen2jprev = NULL;
+ if( gaplen1jbestkamo ) FreeGaplenCub( gaplen1jbestkamo ); gaplen1jbestkamo = NULL;
+ if( gaplen2jbestkamo ) FreeGaplenCub( gaplen2jbestkamo ); gaplen2jbestkamo = NULL;
+ if( gaplen1jbest ) FreeGaplenCub( gaplen1jbest ); gaplen1jbest = NULL;
+ if( gaplen2jbest ) FreeGaplenCub( gaplen2jbest ); gaplen2jbest = NULL;
+ if( gaplen1 ) FreeGaplenMtx( gaplen1, 1 ); gaplen1 = NULL;
+ if( gaplen2 ) FreeGaplenMtx( gaplen2, 1 ); gaplen2 = NULL;
+
+
+ }
+
+ ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
+ ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;
+
+#if DEBUG
+ fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );
+#endif
+
+ w1 = AllocateFloatVec( ll2+2 );
+ w2 = AllocateFloatVec( ll2+2 );
+ match = AllocateFloatVec( ll2+2 );
+
+ initverticalw = AllocateFloatVec( ll1+2 );
+ lastverticalw = AllocateFloatVec( ll1+2 );
+
+ m = AllocateFloatVec( ll2+2 );
+ mp = AllocateIntVec( ll2+2 );
+
+ mseq = AllocateCharMtx( njob, ll1+ll2 );
+
+ cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 );
+ cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 );
+
+#if FASTMATCHCALC
+ doublework = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets );
+ intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets+1 );
+#else
+ doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
+ intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
+#endif
+
+#if DEBUG
+ fprintf( stderr, "succeeded\n" );
+#endif
+
+ orlgth1 = ll1 - 100;
+ orlgth2 = ll2 - 100;
+
+
+// reporterr( "Allocating gaplen1 and gaplen2\n" );
+ gaplen1 = (Gaplen ** )calloc( ll1+2, sizeof( Gaplen * ) );
+ gaplen1[ll1+1] = (Gaplen *)1;
+ gaplen2 = (Gaplen ** )calloc( ll2+2, sizeof( Gaplen * ) );
+ gaplen2[ll2+1] = (Gaplen *)1;
+
+
+// reporterr( "Allocating gaplen*\n" );
+ gaplen1ibestkamo = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) );
+ gaplen2ibestkamo = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) );
+ gaplen1icurr = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) );
+ gaplen2icurr = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) );
+ gaplen1jbestkamo = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen2jbestkamo = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen1jbest = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen2jbest = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen1jcurr = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen2jcurr = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen1jprev = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen2jprev = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+
+ gaplens = calloc( sizeof( Gaplen ***), 12 );
+ gaplens[0] = gaplen1ibestkamo;
+ gaplens[1] = gaplen2ibestkamo;
+ gaplens[2] = gaplen1icurr;
+ gaplens[3] = gaplen2icurr;
+ gaplens[4] = gaplen1jbestkamo;
+ gaplens[5] = gaplen2jbestkamo;
+ gaplens[6] = gaplen1jbest;
+ gaplens[7] = gaplen2jbest;
+ gaplens[8] = gaplen1jcurr;
+ gaplens[9] = gaplen2jcurr;
+ gaplens[10] = gaplen1jprev;
+ gaplens[11] = gaplen2jprev;
+// reporterr( "Allocation end\n" );
+ }
+
+ {
+ int ll1 = lgth1;
+ int ll2 = lgth2;
+
+// reporterr( "Allocating gaplen*i\n" );
+ for(i=0; i<ll1+1; i++ )
+ {
+ gaplen1ibestkamo[i] = (Gaplen **)calloc( ll1+2-i, sizeof( Gaplen * ) );
+ for( k=0; k<ll1+1-i; k++ ) gaplen1ibestkamo[i][k] = NULL;
+ gaplen1ibestkamo[i][k] = (Gaplen *)1;
+
+ gaplen2ibestkamo[i] = (Gaplen **)calloc( ll2+2, sizeof( Gaplen * ) );
+ for( k=0; k<ll2+1; k++ ) gaplen2ibestkamo[i][k] = NULL;
+ gaplen2ibestkamo[i][k] = (Gaplen *)1;
+
+ gaplen1icurr[i] = (Gaplen **)calloc( ll1+2-i, sizeof( Gaplen * ) );
+ for( k=0; k<ll1+1-i; k++ ) gaplen1icurr[i][k] = NULL;
+ gaplen1icurr[i][k] = (Gaplen *)1;
+
+ gaplen2icurr[i] = (Gaplen **)calloc( ll2+2, sizeof( Gaplen * ) );
+ for( k=0; k<ll2+1; k++ ) gaplen2icurr[i][k] = NULL;
+ gaplen2icurr[i][k] = (Gaplen *)1;
+ }
+ gaplen1ibestkamo[ll1+1] = NULL;
+ gaplen2ibestkamo[ll1+1] = NULL;
+ gaplen1icurr[ll1+1] = NULL;
+ gaplen2icurr[ll1+1] = NULL;
+
+// reporterr( "Allocating gaplen*j\n" );
+ for(i=0; i<ll2+1; i++ )
+ {
+ gaplen1jbestkamo[i] = (Gaplen **)calloc( ll1+2, sizeof( Gaplen * ) );
+ for( k=0; k<ll1+1; k++ ) gaplen1jbestkamo[i][k] = NULL;
+ gaplen1jbestkamo[i][k] = (Gaplen *)1;
+
+ gaplen2jbestkamo[i] = (Gaplen **)calloc( ll2+2-i, sizeof( Gaplen * ) );
+ for( k=0; k<ll2+1-i; k++ ) gaplen2jbestkamo[i][k] = NULL;
+ gaplen2jbestkamo[i][k] = (Gaplen *)1;
+
+ gaplen1jbest[i] = (Gaplen **)calloc( ll1+2, sizeof( Gaplen * ) );
+ for( k=0; k<ll1+1; k++ ) gaplen1jbest[i][k] = NULL;
+ gaplen1jbest[i][k] = (Gaplen *)1;
+
+ gaplen2jbest[i] = (Gaplen **)calloc( ll2+2-i, sizeof( Gaplen * ) );
+ for( k=0; k<ll2+1-i; k++ ) gaplen2jbest[i][k] = NULL;
+ gaplen2jbest[i][k] = (Gaplen *)1;
+
+ gaplen1jcurr[i] = (Gaplen **)calloc( ll1+2, sizeof( Gaplen * ) );
+ for( k=0; k<ll1+1; k++ ) gaplen1jcurr[i][k] = NULL;
+ gaplen1jcurr[i][k] = (Gaplen *)1;
+
+ gaplen2jcurr[i] = (Gaplen **)calloc( ll2+2-i, sizeof( Gaplen * ) );
+ for( k=0; k<ll2+1-i; k++ ) gaplen2jcurr[i][k] = NULL;
+ gaplen2jcurr[i][k] = (Gaplen *)1;
+
+ gaplen1jprev[i] = (Gaplen **)calloc( ll1+2, sizeof( Gaplen * ) );
+ for( k=0; k<ll1+1; k++ ) gaplen1jprev[i][k] = NULL;
+ gaplen1jprev[i][k] = (Gaplen *)1;
+
+ gaplen2jprev[i] = (Gaplen **)calloc( ll2+2-i, sizeof( Gaplen * ) );
+ for( k=0; k<ll2+1-i; k++ ) gaplen2jprev[i][k] = NULL;
+ gaplen2jprev[i][k] = (Gaplen *)1;
+
+ }
+ gaplen1jbestkamo[ll2+1] = NULL;
+ gaplen2jbestkamo[ll2+1] = NULL;
+ gaplen1jbest[ll2+1] = NULL;
+ gaplen2jbest[ll2+1] = NULL;
+ gaplen1jcurr[ll2+1] = NULL;
+ gaplen2jcurr[ll2+1] = NULL;
+ gaplen1jprev[ll2+1] = NULL;
+ gaplen2jprev[ll2+1] = NULL;
+ }
+
+
+#if USEGAPLENMTX
+/* maikai allocate */
+
+ reporterr( "Allocating gaplenmtx1\n" );
+ gaplen1mtx = (Gaplen ****)calloc( (lgth1+2), sizeof( Gaplen ***) );
+ for(i=0; i<lgth1+1; i++ ) gaplen1mtx[i] = (Gaplen ***)calloc( lgth2+2, sizeof( Gaplen ** ) );
+ for(i=0; i<lgth1+1; i++ )
+ {
+ for(j=0; j<lgth2+1; j++ )
+ {
+ gaplen1mtx[i][j] = (Gaplen **)calloc( lgth1+2, sizeof( Gaplen * ) );
+ for( k=0; k<lgth1+1; k++ ) gaplen1mtx[i][j][k] = NULL;
+ gaplen1mtx[i][j][k] = (Gaplen *)1;
+ }
+ gaplen1mtx[i][j] = NULL;
+ }
+ gaplen1mtx[i] = NULL;
+
+ reporterr( "Allocating gaplenmtx2\n" );
+ gaplen2mtx = (Gaplen ****)calloc( (lgth1+2), sizeof( Gaplen ***) );
+ for(i=0; i<lgth1+1; i++ ) gaplen2mtx[i] = (Gaplen ***)calloc( lgth2+2, sizeof( Gaplen ** ) );
+ for(i=0; i<lgth1+1; i++ )
+ {
+ for(j=0; j<lgth2+1; j++ )
+ {
+ gaplen2mtx[i][j] = (Gaplen **)calloc( lgth2+2, sizeof( Gaplen * ) );
+ for( k=0; k<lgth2+1; k++ ) gaplen2mtx[i][j][k] = NULL;
+ gaplen2mtx[i][j][k] = (Gaplen *)1;
+ }
+ gaplen2mtx[i][j] = NULL;
+ }
+ gaplen2mtx[i] = NULL;
+
+#endif
+
+#if USEGAPLENHALF
+ reporterr( "Allocating gaplenhalf1\n" );
+ gaplen1half = (Gaplen ****)calloc( (lgth1+2), sizeof( Gaplen ***) );
+ for(i=0; i<lgth1+1; i++ ) gaplen1half[i] = (Gaplen ***)calloc( lgth2+2, sizeof( Gaplen ** ) );
+ for(i=0; i<lgth1+1; i++ )
+ {
+ for(j=0; j<lgth2+1; j++ )
+ {
+ gaplen1half[i][j] = (Gaplen **)calloc( lgth1+2 - i, sizeof( Gaplen * ) );
+ for( k=0; k<lgth1+1-i; k++ ) gaplen1half[i][j][k] = NULL;
+ gaplen1half[i][j][k] = (Gaplen *)1;
+ }
+ gaplen1half[i][j] = NULL;
+ }
+ gaplen1half[i] = NULL;
+
+ reporterr( "Allocating gaplenhalf2\n" );
+ gaplen2half = (Gaplen ****)calloc( (lgth1+2), sizeof( Gaplen ***) );
+ for(i=0; i<lgth1+1; i++ ) gaplen2half[i] = (Gaplen ***)calloc( lgth2+2, sizeof( Gaplen ** ) );
+ for(i=0; i<lgth1+1; i++ )
+ {
+ for(j=0; j<lgth2+1; j++ )
+ {
+ gaplen2half[i][j] = (Gaplen **)calloc( lgth2+2 - j, sizeof( Gaplen * ) );
+ for( k=0; k<lgth2+1-j; k++ ) gaplen2half[i][j][k] = NULL;
+ gaplen2half[i][j][k] = (Gaplen *)1;
+ }
+ gaplen2half[i][j] = NULL;
+ }
+ gaplen2half[i] = NULL;
+#endif
+
+
+/* maikai allocate */
+
+
+ for( i=0; i<icyc; i++ )
+ {
+ mseq1[i] = mseq[i];
+ seq1[i][lgth1] = 0;
+ }
+ for( j=0; j<jcyc; j++ )
+ {
+ mseq2[j] = mseq[icyc+j];
+ seq2[j][lgth2] = 0;
+ }
+
+
+ if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )
+ {
+ int ll1, ll2;
+
+ if( commonAlloc1 && commonAlloc2 )
+ {
+ FreeIntMtx( commonIP );
+ }
+
+ ll1 = MAX( orlgth1, commonAlloc1 );
+ ll2 = MAX( orlgth2, commonAlloc2 );
+
+#if DEBUG
+ fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
+#endif
+
+ commonIP = AllocateIntMtx( ll1+10, ll2+10 );
+
+#if DEBUG
+ fprintf( stderr, "succeeded\n\n" );
+#endif
+
+ commonAlloc1 = ll1;
+ commonAlloc2 = ll2;
+ }
+ ijp = commonIP;
+
+#if 0
+ {
+ double t = 0.0;
+ for( i=0; i<icyc; i++ )
+ t += eff1[i];
+ fprintf( stderr, "## totaleff = %f\n", t );
+ }
+#endif
+
+ cpmx_calc_new( seq1, cpmx1, eff1, lgth1, icyc );
+ cpmx_calc_new( seq2, cpmx2, eff2, lgth2, jcyc );
+
+
+// reporterr( "Counting gaplen\n" );
+ gaplencount( icyc, lgth1, gaplen1, seq1, eff1 );
+ gaplencount( jcyc, lgth2, gaplen2, seq2, eff2 );
+#if DEBUG
+ reporterr( "group1 = \n" );
+ showgaplen( gaplen1, lgth1 );
+ reporterr( "group2 = \n" );
+ showgaplen( gaplen2, lgth2 );
+#endif
+// reporterr( "done.\n" );
+
+
+ for( i=0; i<lgth1+1; i++ ) for( j=0; j<lgth2+1; j++ )
+ {
+#if USEGAPLENMTX
+// duplicategaplen( gaplen1mtx[i][j], gaplen1, lgth1 );
+// duplicategaplen( gaplen2mtx[i][j], gaplen2, lgth2 );
+
+// duplicategaplenpartly( gaplen2mtx[i][j], gaplen2, j-0, lgth2 ); // anzen
+// duplicategaplenpartly( gaplen1mtx[i][j], gaplen1, i-0, lgth1 ); // anzen
+ duplicategaplenpartly( gaplen1mtx[i][j], gaplen1, i-0, i ); // iranaikamo
+ duplicategaplenpartly( gaplen2mtx[i][j], gaplen2, j-0, j ); // iranaikamo
+#endif
+#if USEGAPLENHALF
+
+// duplicategaplenpartly( gaplen1half[i][j], gaplen1+i, 0, lgth1-i ); // KOKO de setsuyaku dekiru to omou.
+// duplicategaplenpartly( gaplen2half[i][j], gaplen2+j, 0, lgth2-j ); // originally, j-1, lgth2
+ duplicategaplenpartly( gaplen1half[i][j], gaplen1+i, 0, 0 ); // test
+ duplicategaplenpartly( gaplen2half[i][j], gaplen2+j, 0, 0 ); // test
+#endif
+ }
+
+
+#if USEGAPLENMTX
+ reporterr( "Duplicating gaplen*mtx\n" );
+ for( i=0; i<lgth1+1; i++ )
+ {
+// addnewgaplen( gaplen1mtx[i][0], gaplen1, gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2mtx[i][0], gaplen2, gaplen2, lgth2, 0, i );
+// duplicategaplenpartly( gaplen1mtx[i][0], gaplen1, 0, lgth1 );
+// duplicategaplenpartly( gaplen2mtx[i][0], gaplen2, 0, lgth2 );
+ copygaplenrestricted( gaplen2mtx[i][0], gaplen2, lgth2, 0, i, 0, 0 );
+ }
+#endif
+#if USEGAPLENHALF
+ reporterr( "Duplicating gaplen*mtx\n" );
+ for( i=0; i<lgth1+1; i++ )
+ {
+ copygaplenrestricted( gaplen2half[i][0], gaplen2, lgth2, 0, i, 0, 0 );
+ }
+#endif
+
+
+
+ for( i=0; i<1; i++ )
+ {
+// duplicategaplencompactx( gaplen1icurr[i], gaplen1, lgth1, i-0, lgth1 ); //originally, 0, lgth1
+//
+// duplicategaplencompactx( gaplen1icurr[i], gaplen1+i, lgth1-i, 0, lgth1-i ); // half
+ duplicategaplencompactx( gaplen1icurr[i], gaplen1+i, lgth1-i, 0, 1 ); // 0, 1 hitsuyou
+
+
+// duplicategaplencompactx( gaplen2icurr[i], gaplen2, lgth2, 0, lgth2 ); // ichiou zenbu
+ duplicategaplencompactx( gaplen2icurr[i], gaplen2, lgth2, 0, 0 );
+
+
+ copygaplencompactx( gaplen2icurr[i], gaplen2, lgth2, 0, i, 0, 0 ); // -> zurasu -> error?
+
+
+// duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1, lgth1, 0, 1 );
+// duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1+i, lgth1-i, 0, 1 ); // half
+ duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1+i, lgth1-i, 0, 0 ); // half
+// duplicategaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, lgth2 );
+ duplicategaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, 0 );
+// copygaplenrestricted( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, 0 ); // -> zurasu -> error?
+// copygaplenrestricted_zurasu( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, lgth2, 0, lgth2 ); // -> zurasu -> error?
+ copygaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, 0 ); // -> zurasu -> error?
+ }
+
+// reporterr( "Duplicating gaplen*j*curr \n" );
+// int nduplicated = 0;
+ for( j=0; j<lgth2+1; j++ )
+// for( j=0; j<1; j++ )
+ {
+#if USEGAPLENMTX
+// addnewgaplen( gaplen1mtx[0][j], gaplen1, gaplen1, lgth1, 0, j );
+// addnewgaplen( gaplen2mtx[0][j], gaplen2, gaplen2, lgth2, -1, 0 );
+// duplicategaplenpartly( gaplen1mtx[0][j], gaplen1, 0, lgth1 );
+// duplicategaplenpartly( gaplen2mtx[0][j], gaplen2, 0, lgth2 );
+ copygaplenrestricted( gaplen1mtx[0][j], gaplen1, lgth1, 0, j, 0, 0 );
+#endif
+
+#if USEGAPLENHALF
+ copygaplenrestricted( gaplen1half[0][j], gaplen1, lgth1, 0, j, 0, 0 );
+#endif
+// reporterr( "1jcurr?\n" );
+// duplicategaplencompactx( gaplen1jcurr[j], gaplen1, lgth1, 0, lgth1 ); // test
+ duplicategaplencompactx( gaplen1jcurr[j], gaplen1, lgth1, 0, 0 ); // dame?
+// reporterr( "done\n" );
+// duplicategaplencompactx( gaplen1jcurr[j], gaplen1, lgth1, 0, 0 ); //test
+
+// duplicategaplencompactx( gaplen2jcurr[j], gaplen2, lgth2, j-0, lgth2 ); // full
+// duplicategaplencompactx( gaplen2jcurr[j], gaplen2+j, lgth2-j, 0, lgth2-j ); //half! KOKO????
+//reporterr( "starting suspicious duplication\n" );
+ duplicategaplencompactx( gaplen2jcurr[j], gaplen2+j, lgth2-j, 0, 0 ); //half!
+//reporterr( "starting suspicious copy\n" );
+ copygaplencompactx( gaplen1jcurr[j], gaplen1, lgth1, 0, j, 0, 0 ); // TEST
+//reporterr( "finished\n" );
+
+// reporterr( "Checking gaplen1jcurr[%d]\n", j );
+// checkgaplen( gaplen1jcurr[j], 100 );
+// reporterr( "Checking gaplen2jcurr[%d]\n", j );
+// checkgaplen( gaplen2jcurr[j], 100 );
+ }
+
+// reporterr( "nduplicated (corrected) = %d\n", nduplicated );
+
+// reporterr( "Duplicating gaplen*j*prev \n\n" );
+ for( j=0; j<lgth2+1; j++ ) // allocate nominotame, atode uwagaki
+ {
+// duplicategaplencompactx( gaplen1jprev[j], gaplen1, lgth1, 0, lgth1 );
+ duplicategaplencompactx( gaplen1jprev[j], gaplen1, lgth1, 0, 0 ); // TEST
+// duplicategaplencompactx( gaplen2jprev[j], gaplen2, lgth2, j-0, lgth2 ); // originally, 0,lgth2
+// duplicategaplencompactx( gaplen2jprev[j], gaplen2+j, lgth2-j, 0, lgth2-j ); // half
+ duplicategaplencompactx( gaplen2jprev[j], gaplen2+j, lgth2-j, 0, 0 ); // half
+
+
+ copygaplencompactx( gaplen1jprev[j], gaplen1, lgth1, 0, j, 0, 0 ); // wasuretetakamo
+
+
+ }
+
+
+// reporterr( "Duplicating gaplen*j*best \n\n" );
+
+ for( j=0; j<lgth2+1; j++ )
+// for( j=0; j<1; j++ )
+ {
+// duplicategaplencompactx( gaplen1jbestkamo[j], gaplen1, lgth1, 0, lgth1 ); // KOKO
+// duplicategaplencompactx( gaplen1jbestkamo[j], gaplen1, lgth1, 0, 0 ); // test
+// duplicategaplencompactx( gaplen1jbestkamo[j], gaplen1, lgth1, 0, 1 );
+ duplicategaplencompactx( gaplen1jbestkamo[j], gaplen1, lgth1, 0, 0 );
+
+
+// duplicategaplencompactx( gaplen1jbestkamo[j], gaplen1, lgth1, 0, 1 );
+// duplicategaplencompactx( gaplen2jbestkamo[j], gaplen2, lgth2, j-0, j+1 ); // originally, 0, j+1
+ duplicategaplencompactx( gaplen2jbestkamo[j], gaplen2+j, lgth2-j, 0, 1 ); // half!
+ copygaplencompactx( gaplen1jbestkamo[j], gaplen1, lgth1, 0, j, 0, 0 ); // TEST
+
+// duplicategaplencompactx( gaplen1jbest[j], gaplen1, lgth1, 0, lgth1 ); // KOKO
+ duplicategaplencompactx( gaplen1jbest[j], gaplen1, lgth1, 0, 0 ); // test
+// duplicategaplencompactx( gaplen2jbest[j], gaplen2, lgth2,j-0, j+1 ); // originally, 0,j+1
+ duplicategaplencompactx( gaplen2jbest[j], gaplen2+j, lgth2-j, 0, 1 ); // half!
+ copygaplencompactx( gaplen1jbest[j], gaplen1, lgth1, 0, j, 0, 0 ); // TEST
+
+
+ }
+
+// reporterr( "Duplication end\n" );
+
+
+#if 0
+ reporterr( "Checking gaplen1icurr\n" );
+ checkgaplen( gaplen1icurr[0], 100 );
+ reporterr( "Checking gaplen2icurr\n" );
+ checkgaplen( gaplen2icurr[0], 100 );
+#endif
+
+
+
+
+// showgaplen( gaplen1jcurr[50], lgth2 );
+
+ currentw = w1;
+ previousw = w2;
+
+ match_calc( n_dynamicmtx, initverticalw, cpmx2, cpmx1, 0, lgth1, doublework, intwork, 1 );
+ if( localhom )
+ imp_match_out_vead_tate( initverticalw, 0, lgth1 ); // 060306
+
+ match_calc( n_dynamicmtx, currentw, cpmx1, cpmx2, 0, lgth2, doublework, intwork, 1 );
+ if( localhom )
+ imp_match_out_vead( currentw, 0, lgth2 ); // 060306
+#if 0 // -> tbfast.c
+ if( localhom )
+ imp_match_calc( n_dynamicmtx, currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 );
+
+#endif
+
+ for( j=1; j<lgth2+1; j++ )
+ {
+ pfac = calcpfac_gap_noidatend( gaplen1, gaplen2, j, 0, j, seq1[0], seq2[0], 0 );
+// reporterr( "computing initial end gap penalty for %c-%c, i=0, j=%d, pfac=%f\n", seq1[0][0], seq2[0][j], j, pfac );
+// reporterr( "%c-%c, i=0, j=%d, currentw[j]=%f -> ", seq1[0][0], seq2[0][j], j, currentw[j] );
+ currentw[j] += fpenalty * pfac; // tekitou
+// reporterr( " %f\n", currentw[j] );
+ }
+ for( i=1; i<lgth1+1; i++ )
+ {
+ pfac = calcpfac_gap_noidatend( gaplen2, gaplen1, i, 0, i, seq2[0], seq1[0], 0 );
+// reporterr( "computing initial end gap penalty for %c-%c, i=%d, j=0, pfac=%f\n", seq1[0][i], seq2[0][0], i, pfac );
+ initverticalw[i] += fpenalty * pfac; // tekitou
+ }
+
+
+
+ for( j=1; j<lgth2+1; ++j )
+ {
+
+
+#if ALGZGAP
+ m[j] = currentw[j-1] + ogcp1[1] * gapfreq2[j-1]; mp[j] = 0;;
+#else
+ pfac = calcpfac_gapex_noidatend( gaplen2, gaplen1, j, 1, j, seq2[0], seq1[0], 1 );
+#if DEBUG
+ reporterr( "%c-%c, INITIAL jgap extension check, pfac = %f\n\n", seq1[0][j], '=', pfac );
+#endif
+ m[j] = currentw[j-1] + fpenalty * pfac;
+ mp[j] = 0;
+#endif
+ }
+ if( lgth2 == 0 )
+ lastverticalw[0] = 0.0; // Falign kara yobaretatoki kounarukanousei ari
+ else
+ lastverticalw[0] = currentw[lgth2-1];
+
+ if( tailgp ) lasti = lgth1+1; else lasti = lgth1;
+ lastj = lgth2+1;
+
+
+ for( i=1; i<lasti; i++ )
+ {
+// reporterr( "i = %d\n", i );
+
+// reporterr( "err1? i=%d/%d\n", i, lgth1 );
+#ifdef enablemultithread
+// fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref );
+ if( chudanpt && *chudanpt != chudanref )
+ {
+ cleargaplens( gaplens );
+// fprintf( stderr, "\n\n## CHUUDAN!!! S\n" );
+ *chudanres = 1;
+ return( -1.0 );
+ }
+#endif
+
+
+ wtmp = previousw;
+ previousw = currentw;
+ currentw = wtmp;
+
+ previousw[0] = initverticalw[i-1];
+
+#if 1
+ gaplentmp = gaplen1jprev;
+ gaplen1jprev = gaplen1jcurr;
+ gaplen1jcurr = gaplentmp;
+
+ gaplentmp = gaplen2jprev;
+ gaplen2jprev = gaplen2jcurr;
+ gaplen2jcurr = gaplentmp;
+
+#if DEBUG
+ reporterr( "Entering a small j loop, i=%d\n", i );
+ for( j=1; j<lgth2+1; j++ )
+ {
+ reporterr( "before j loop, i=%d, gaplen2jcurr[%d] = \n", i, j );
+ showgaplen( gaplen2jcurr[j], 100 );
+ reporterr( "\n" );
+ reporterr( "before j loop, i=%d, gaplen2prev[%d] = \n", i, j );
+ showgaplen( gaplen2jprev[j], 100 );
+ reporterr( "\n" );
+ }
+#endif
+#else
+
+ reporterr( "Entering a small j loop, i=%\n", i );
+ for( j=1; j<lgth2+1; j++ )
+ {
+// addnewgaplen( gaplen1jprev[j], gaplen1jcurr[j], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2jprev[j], gaplen2jcurr[j], gaplen2, lgth2, -1, 0 );
+ reporterr( "err1? j=%d/%d\n", j, lgth2 );
+ copygaplencompactx( gaplen1jprev[j-1], gaplen1jcurr[j-1], lgth1, -1, 0, i-1, i-1 ); // TEST
+ reporterr( "err1? j=%d/%d\n", j, lgth2 );
+ copygaplencompactx( gaplen2jprev[j-1], gaplen2jcurr[j-1], lgth2, -1, 0, j-1, j-1 ); // TETS
+#if DEBUG
+ reporterr( "before j loop, i=%d, gaplen2jcurr[%d] = \n", i, j );
+ showgaplen( gaplen2jcurr[j], 100 );
+ reporterr( "\n" );
+ reporterr( "before j loop, i=%d, gaplen2prev[%d] = \n", i, j );
+ showgaplen( gaplen2jprev[j], 100 );
+ reporterr( "\n" );
+#endif
+ }
+#endif
+
+// reporterr( "err2? i=%d/%d\n", i, lgth1 );
+
+// duplicategaplencompactx( gaplen1icurr[i], gaplen1, lgth1, i, i+1 ); //originally 0, i+1
+// reporterr( "gaplen+0=\n");
+// showgaplen( gaplen1, 10 );
+// reporterr( "i=%d, lgth1=%d, lgth1-i=%d, gaplen+i-1=\n", i, lgth1, lgth1-i );
+// showgaplen( gaplen1+i-1, 100 );
+ duplicategaplencompactx( gaplen1icurr[i], gaplen1+i, lgth1-i, 0, 1 ); // half!!
+// duplicategaplencompactx( gaplen2icurr[i], gaplen2, lgth2, 0, lgth2 ); // KOKO
+ duplicategaplencompactx( gaplen2icurr[i], gaplen2, lgth2, 0, 0 ); // test
+ copygaplencompactx( gaplen2icurr[i], gaplen2, lgth2, 0, i, 0, 0 ); // IRU? TEST
+
+
+
+// duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1, lgth1, i, i+1 ); //originally 0, i+1
+ duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1+i, lgth1-i, 0, 1 ); //half
+// duplicategaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, lgth2 ); // ORIGINALLY, 0, lgth2
+ duplicategaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, 0 ); // ORIGINALLY, 0, lgth2
+// copygaplenrestricted( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, lgth2, 0, 0 ); // IRU? // TEST
+ copygaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, 0 ); // IRU? // TEST
+
+ extendgaplencompactx( gaplen1jprev[0], gaplen1, i ); // ???
+
+
+// addnewgaplen( gaplen1jprev[0], gaplen1icurr[i-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2jprev[0], gaplen2icurr[i-1], gaplen2, lgth2, -1, 0 );
+// copygaplenrestricted( gaplen1jprev[0], gaplen1icurr[i-1], lgth1, -1, 0, i, i ); // i-1, i da to omou.
+ copygaplencompactx( gaplen1jprev[0], gaplen1icurr[i-1], lgth1-i, -1, 0, i, 1 ); // half? lgth1-i?
+// copygaplenrestricted( gaplen2jprev[0], gaplen2icurr[i-1], lgth2, -1, 0, 0, 0 );
+ copygaplencompactx( gaplen2jprev[0], gaplen2icurr[i-1], lgth2-j, -1, 0, 0, 0 ); // half?? lgth2-j?
+
+
+ match_calc( n_dynamicmtx, currentw, cpmx1, cpmx2, i, lgth2, doublework, intwork, 0 );
+#if XXXXXXX
+fprintf( stderr, "\n" );
+fprintf( stderr, "i=%d\n", i );
+fprintf( stderr, "currentw = \n" );
+for( j=0; j<lgth2; j++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[j] );
+}
+fprintf( stderr, "\n" );
+#endif
+ if( localhom )
+ {
+// fprintf( stderr, "Calling imp_match_calc (o) lgth = %d, i = %d\n", lgth1, i );
+#if 0
+ imp_match_out_vead( currentw, i, lgth2 );
+#else
+ imp_match_out_vead( currentw, i, lgth2 );
+#endif
+ }
+#if XXXXXXX
+fprintf( stderr, "\n" );
+fprintf( stderr, "i=%d\n", i );
+fprintf( stderr, "currentw = \n" );
+for( j=0; j<lgth2; j++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[j] );
+}
+fprintf( stderr, "\n" );
+#endif
+ currentw[0] = initverticalw[i];
+
+#if 0
+ fprintf( stderr, "%c ", seq1[0][i] );
+ for( j=0; j<lgth2+1; j++ )
+ {
+ fprintf( stderr, "%5.0f ", currentw[j] );
+ }
+ fprintf( stderr, "\n" );
+#endif
+
+// mi = previousw[0] + ogcp2[1]; mpi = 0;
+
+
+
+
+
+#if ALGZGAP
+ mi = previousw[0] + ogcp2[1] * gapfreq1[i-1]; mpi=0;
+#else
+ pfac = calcpfac_gapex_noidatend( gaplen1, gaplen2, i, 1, i, seq1[0], seq2[0], 1 );
+#if DEBUG
+ reporterr( "%c-%c, INITIAL igap extension check, pfac = %f\n\n", '=', seq2[0][j], pfac );
+#endif
+ mi = previousw[0] + fpenalty * pfac;
+ mpi=0;
+#endif
+ ijppt = ijp[i] + 1;
+ mjpt = m + 1;
+ prept = previousw;
+ curpt = currentw + 1;
+ mpjpt = mp + 1;
+
+
+ if( trywarp )
+ {
+ prevwmrecordspt = prevwmrecords;
+ wmrecordspt = wmrecords+1;
+ wmrecords1pt = wmrecords;
+ warpipt = warpi + 1;
+ warpjpt = warpj + 1;
+ }
+
+
+
+
+// reporterr( "\n\ni=%d, %c\n", i, seq1[0][i] );
+ for( j=1; j<lastj; j++ )
+ {
+
+#if DEBUG
+ reporterr( "***** %c%d-%c%d ******* \n", seq1[0][i], i, seq2[0][j], j );
+ reporterr( "mpi=%d (%c), *mpjpt=%d (%c)\n", mpi, seq2[0][mpi], *mpjpt, seq1[0][*mpjpt] );
+#endif
+
+
+// Hitsuyou na bun dake tsuika
+#if USEGAPLENMTX
+ extendgaplenpartly( gaplen1mtx[i-1][mpi], gaplen1, i, i );
+ extendgaplenpartly( gaplen2mtx[i-1][mpi], gaplen2, j, j );
+ extendgaplenpartly( gaplen1mtx[*mpjpt][j-1], gaplen1, i, i );
+ extendgaplenpartly( gaplen2mtx[*mpjpt][j-1], gaplen2, j, j );
+ extendgaplenpartly( gaplen1mtx[i-1][j-1], gaplen1, i, i );
+ extendgaplenpartly( gaplen2mtx[i-1][j-1], gaplen2, j, j );
+#endif
+#if USEGAPLENHALF
+ extendgaplenpartly( gaplen1half[i-1][mpi], gaplen1+i-1, 1, 1 );
+ extendgaplenpartly( gaplen2half[i-1][mpi], gaplen2+mpi, j-mpi, j-mpi );
+ extendgaplenpartly( gaplen1half[*mpjpt][j-1], gaplen1+*mpjpt, i-*mpjpt, i-*mpjpt );
+ extendgaplenpartly( gaplen2half[*mpjpt][j-1], gaplen2+j-1, 1, 1 );
+ extendgaplenpartly( gaplen1half[i-1][j-1], gaplen1+i-1, 1, 1 );
+ extendgaplenpartly( gaplen2half[i-1][j-1], gaplen2+j-1, 1, 1 );
+
+#endif
+
+// reporterr( "extending gaplen1icurr\n" );
+ extendgaplencompactx( gaplen1icurr[i-1], gaplen1+i-1, 1 ); // iruhazu
+// reporterr( "extending gaplen2icurr\n" );
+ extendgaplencompactx( gaplen2icurr[i-1], gaplen2, j ); // iruhazu
+// reporterr( "extending gaplen1jprev[j-1], j-1=%d\n", j-1 );
+ extendgaplencompactx( gaplen1jprev[j-1], gaplen1, i );
+// reporterr( "extending gaplen1jcurr, j-1=%d\n", j-1 );
+ extendgaplencompactx( gaplen1jcurr[j-1], gaplen1, i );
+// reporterr( "extending gaplen2jprev\n" );
+ extendgaplencompactx( gaplen2jprev[j-1], gaplen2+j-1, 1 );
+// reporterr( "extending gaplen2jcurr\n" );
+ extendgaplencompactx( gaplen2jcurr[j-1], gaplen2+j-1, 1 );
+// reporterr( "extending gaplen1jbest[j-1]\n" );
+ extendgaplencompactx( gaplen1jbest[j-1], gaplen1, i );
+// reporterr( "extending gaplen1jbestkamo[j-1]\n" );
+ extendgaplencompactx( gaplen1jbestkamo[j-1], gaplen1, i );
+// reporterr( "extending gaplen1jprev[mpi], j-1=%d\n", j-1 );
+ extendgaplencompactx( gaplen1jprev[mpi], gaplen1, i ); // full
+// reporterr( "extending gaplen2jprev[mpi]\n" );
+ extendgaplencompactx( gaplen2jprev[mpi], gaplen2+mpi, j-mpi ); // half
+// reporterr( "extending gaplen1ibestkamo[i-1]\n" );
+ extendgaplencompactx( gaplen1ibestkamo[i-1], gaplen1+i-1, 1 );
+// reporterr( "extending gaplen2ibestkamo[i-1]\n" );
+ extendgaplencompactx( gaplen2ibestkamo[i-1], gaplen2, j );
+
+
+#if DEBUG
+ reporterr( "at the head of j loop, i,j=%d,%d, gaplen2jcurr[j]=\n", i, j );
+ showgaplen( gaplen2jcurr[j], 100 );
+
+ reporterr( "at the head of j loop, i,j=%d,%d, gaplen2jcurr[j-1]=\n", i, j );
+ showgaplen( gaplen2jcurr[j-1], 100 );
+
+
+ reporterr( "at the head of j loop, i,j=%d,%d, gaplen2jprev[j]=\n", i, j );
+ showgaplen( gaplen2jprev[j], 100 );
+
+ reporterr( "at the head of j loop, i,j=%d,%d, gaplen2jprev[j-1]=\n", i, j );
+ showgaplen( gaplen2jprev[j-1], 100 );
+#endif
+
+
+#ifdef xxxenablemultithread
+// fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref );
+ if( chudanpt && *chudanpt != chudanref )
+ {
+ cleargaplens( gaplens );
+// fprintf( stderr, "\n\n## CHUUDAN!!! S\n" );
+ *chudanres = 1;
+ return( -1.0 );
+ }
+#endif
+#if USEGAPLENHALF
+// i==248, j==80 wo check
+#if DEBUG80
+ if( j==80 )
+ {
+ reporterr( "When i==%d, j==%d,\n", i, j );
+ reporterr( "gaplen2jprev[j-1=%d]=\n", j-1 );
+ showgaplen( gaplen2jprev[j-1], lgth1 );
+ reporterr( "gaplen2half[i-1=%d][j-1=%d]=\n", i-1, j-1 );
+ showgaplen( gaplen2half[i-1][j-1], lgth1 );
+ }
+ if( j==79 )
+ {
+ reporterr( "When i==%d, j==%d,\n", i, j );
+ reporterr( "gaplen2jprev[j-1=%d]=\n", j-1 );
+ showgaplen( gaplen2jprev[j-1], lgth1 );
+ reporterr( "gaplen2half[i-1=%d][j-1=%d]=\n", i-1, j-1 );
+ showgaplen( gaplen2half[i-1][j-1], lgth1 );
+ }
+#endif
+#endif
+
+
+// pfac = calcpfac( gaplen1jprev[j-1], gaplen2jprev[j-1], i, j, seq1[0], seq2[0] );
+//reporterr( "#### COMPACT, i,j=%d,%d\n", i, j );
+ pfac = calcpfacnoidatend( gaplen1jprev[j-1], gaplen2jprev[j-1], i, 1, seq1[0], seq2[0]+j, one ); // 1j->full, 2j->half
+#if USEGAPLENMTX
+//reporterr( "#### FULL, i,j=%d,%d\n", i, j );
+ pfactmp = calcpfac( gaplen1mtx[i-1][j-1], gaplen2mtx[i-1][j-1], i, j, seq1[0], seq2[0], one );
+#endif
+#if USEGAPLENHALF
+//reporterr( "#### HALF, i,j=%d/%d,%d/%d\n", i, lgth1, j, lgth2 );
+// showgaplen( gaplen2half[i-1][j-1], lgth2-j );
+ pfactmp = calcpfac( gaplen1half[i-1][j-1], gaplen2half[i-1][j-1], 1, 1, seq1[0]+i, seq2[0]+j, zero );
+#endif
+#if USEGAPLENMTX + USEGAPLENHALF
+ if( pfac != pfactmp )
+ {
+ reporterr( "(straight) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j );
+ PFACERROR = 1;
+ exit( 1 );
+ }
+#endif
+//if( i==50 && j==135 ) exit( 1 );
+
+
+// reporterr( "i,j=%d,%d, *prept = %f\n", i, j, *prept );
+
+#if ALGZSTRAIGHT
+ wm = *prept; // Machigai!!
+#else
+ wm = *prept + fpenalty * pfac;
+#endif
+ *ijppt = 0;
+
+
+#if DEBUG
+ if( i == j )
+ {
+ fprintf( stderr, "\n i=%d, j=%d %c, %c ", i, j, seq1[0][i], seq2[0][j] );
+ fprintf( stderr, "%5.0f, pfac for straight =%f\n", wm, pfac );
+ }
+#endif
+ newgaplen = j-mpi-1;
+
+
+//if( i == 53 && j == 93 ) exit( 1 );
+
+
+
+
+// pfac = calcpfac_gap_incomplete( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], newgaplen, i, j, seq1[0], seq2[0], 0 ); // i-1
+ pfac = calcpfac_gap_noidatend( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], newgaplen, 1, j, seq1[0]+i-1, seq2[0], 0 ); // i-1
+#if USEGAPLENMTX
+ pfactmp = calcpfac_gap_incomplete( gaplen1mtx[i-1][mpi], gaplen2mtx[i-1][mpi], newgaplen, i, j, seq1[0], seq2[0], 1 );
+#endif
+#if USEGAPLENHALF
+ pfactmp = calcpfac_gap_incomplete( gaplen1half[i-1][mpi], gaplen2half[i-1][mpi], newgaplen, 1, j-mpi, seq1[0]+i-1, seq2[0]+mpi, 1 );
+#endif
+#if USEGAPLENMTX || USEGAPLENHALF
+ if( pfac != pfactmp )
+ {
+ reporterr( "(igap) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j );
+ PFACERROR = 1;
+ }
+#endif
+
+
+#if DEBUG
+ reporterr( "%c-%c pfac for igap end incomplete = %f\n", seq1[0][i], seq2[0][j], pfac );
+ reporterr( "mi when igap end checking = %f\n", mi );
+ reporterr( "wm = %f, mi+fpenalty*pfac=%f\n", wm, mi+fpenalty*pfac );
+#endif
+
+
+#if ALGZGAP
+ if( (g=mi+*fgcp2pt*gf1va) > wm )
+#else
+ if( (g=mi+fpenalty*pfac) > wm )
+#endif
+ {
+ wm = g;
+ *ijppt = -( j - mpi );
+#if DEBUG80
+ reporterr( "Selected as igap end! wm = %f, mi = %f\n", wm, mi );
+ fprintf( stderr, "Jump from %d-%d (%c-%c) to %d (%c-%c)!\n", i, j, seq1[0][i], seq2[0][j], mpi, seq1[0][i-1], seq2[0][mpi] );
+#endif
+ }
+
+
+#if 0
+ fprintf( stderr, "%5.0f->", wm );
+#endif
+// if( (g=*mjpt+ fgcp1va* *gf2pt) > wm )
+
+#if 0
+// reporterr( "Checking %c, (current pos = %c)\n", seq2[0][j+1], seq2[0][j] );
+ sfac = 1.0;
+ for( k=0; gaplen2[j+1]&&(gl=gaplen2[j+1][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou
+ {
+// reporterr( ".len = %d, .relend = %d\n", gaplen2[j+1][k].len, gaplen2[j+1][k].relend );
+ if( gl - 1 == gaplen2[j+1][k].relend )
+ {
+ sfac -= gaplen2[j+1][k].freq;
+// reporterr( "Hit! sfac = %f\n", sfac );
+ }
+ }
+ sfac2 = 1.0;
+ for( k=0; gaplen1[i+1]&&(gl=gaplen1[i+1][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou
+ if( gaplen1[i+1][k].relend != -1 ) sfac2 -= gaplen1[i+1][k].freq;
+ sfac *= sfac2;
+#else
+// sfac = 0.0;
+#endif
+
+
+
+#if ALGZGAP
+ if( (g=*prept+*ogcp2pt*gf1vapre) >= mi )
+#else
+// if( (g=*prept + fpenalty * sfac ) >= mi )
+ if( (g=*prept ) >= mi )
+#endif
+ {
+// mpibk = mpi;
+// mi = g - fpenalty * sfac;
+ mi = g;
+ mpi = j-1;
+#if DEBUG80
+ reporterr( "Selected as igap start! %c%d-%c%d, mi=%f, g=%f\n", seq1[0][i-1], i-1, seq2[0][mpi], mpi, mi, g );
+#endif
+
+#if FREEFREQUENTLY
+// freegaplenpartly( gaplen1ibestkamo[i-1], 0, i-1 );
+ freegaplenpartly( gaplen2ibestkamo[i-1], j-3, j-2 );
+#endif
+// freegaplenpartly( gaplen1jprev[mpibk], 0, lgth2 ); // full
+// freegaplenpartly( gaplen2jprev[mpibk], 0, lgth2-mpibk ); // half
+// if( gaplen1jprev[mpibk] ) FreeGaplenMtx( gaplen1jprev[mpibk], 0 );
+// gaplen1jprev[mpibk] = NULL;
+// if( gaplen2jprev[mpibk] ) FreeGaplenMtx( gaplen2jprev[mpibk], 0 );
+// gaplen2jprev[mpibk] = NULL;
+
+
+// addnewgaplen( gaplen1ibestkamo[i-1], gaplen1jprev[j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2ibestkamo[i-1], gaplen2jprev[j-1], gaplen2, lgth2, -1, 0 );
+// copygaplenrestricted( gaplen1ibestkamo[i-1], gaplen1jprev[j-1], lgth1, -1, 0, i, i ); // i-1, i
+ copygaplencompactx( gaplen1ibestkamo[i-1], gaplen1jprev[j-1], lgth1, -1, 0, 1, i ); // half
+// copygaplenrestricted( gaplen2ibestkamo[i-1], gaplen2jprev[j-1], lgth2, -1, 0, j, j ); // mpi, j
+ copygaplencompactx( gaplen2ibestkamo[i-1], gaplen2jprev[j-1], lgth2, -1, 0, j, 1 ); //half
+
+
+ }
+
+
+
+
+
+
+// reporterr( "g=%f, *prept=%f, mi=%f\n", g, *prept, mi );
+
+
+#if USE_PENALTY_EX
+ mi += fpenalty_ex;
+#endif
+
+#if ALGZGAP
+ pfac = 0.0; // CHUUI!
+#else
+
+// pfac = calcpfac_gapex( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], i, j, j-mpi, seq1[0], seq2[0], 1 ); // i-1
+ pfac = calcpfac_gapex_noidatend( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], 1, j, j-mpi, seq1[0]+i, seq2[0], 1 ); // 1ibest->half, 2ibest->full
+#if USEGAPLENMTX
+ pfactmp = calcpfac_gapex( gaplen1mtx[i-1][mpi], gaplen2mtx[i-1][mpi], i, j, j-mpi, seq1[0], seq2[0], 1 );
+#endif
+#if USEGAPLENHALF
+ pfactmp = calcpfac_gapex( gaplen1half[i-1][mpi], gaplen2half[i-1][mpi], 1, j-mpi, j-mpi, seq1[0]+i, seq2[0]+mpi, 1 );
+#endif
+#if USEGAPLENMTX || USEGAPLENHALF
+ if( pfac != pfactmp )
+ {
+ reporterr( "(igapex) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j );
+ PFACERROR = 1;
+ }
+#endif
+
+
+
+
+
+
+
+#if DEBUG
+ reporterr( "%c-%c, igap extension check, pfac = %f\n\n", '=', seq2[0][j], pfac );
+#endif
+#endif
+// reporterr( "mi = %f -> ", mi );
+ mi += fpenalty * pfac;
+// reporterr( "mi = %f\n", mi );
+
+
+// reporterr( "using %d-%d, %d, %d\n", *mpjpt, j-1, i, j );
+ newgaplen = i-*mpjpt-1;
+// pfac = calcpfac_gap_incomplete( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], newgaplen, j, i, seq2[0], seq1[0], 0 ); // j-1 deha???
+
+
+ pfac = calcpfac_gap_noidatend( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], newgaplen, 1, i, seq2[0]+j-1, seq1[0], 1 ); // 2jbestkamo->half, 1jbestkamo->full
+#if USEGAPLENMTX
+ pfactmp = calcpfac_gap_incomplete( gaplen2mtx[*mpjpt][j-1], gaplen1mtx[*mpjpt][j-1], newgaplen, j, i, seq2[0], seq1[0], 1 );
+#endif
+#if USEGAPLENHALF
+ pfactmp = calcpfac_gap_incomplete( gaplen2half[*mpjpt][j-1], gaplen1half[*mpjpt][j-1], newgaplen, 1, i-*mpjpt, seq2[0]+j-1, seq1[0]+*mpjpt, 1 );
+#endif
+#if USEGAPLENMTX || USEGAPLENHALF
+ if( pfac != pfactmp )
+ {
+ reporterr( "(jgap) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j );
+// exit( 1 );
+ PFACERROR = 1;
+ }
+#endif
+
+#if ALGZGAP
+ if( (g=*mjpt+ fgcp1va* *gf2pt) > wm )
+#else
+ if( (g=*mjpt + fpenalty*pfac) > wm )
+#endif
+ {
+ wm = g;
+ *ijppt = +( i - *mpjpt );
+
+
+#if FREEFREQUENTLY
+ freegaplenpartly( gaplen1jbest[j-1], i-3, i-2 );
+// freegaplenpartly( gaplen2jbest[j-1], j-3, j-2 );
+#endif
+
+
+#if DEBUG
+ reporterr( "Selected as jgap end!, pfac = %f\n", pfac );
+ fprintf( stderr, "Jump from %d (%c) to %d (%c)!\n", j, seq1[0][j], *mpjpt, seq1[0][*mpjpt] );
+#endif
+// addnewgaplen( gaplen1jbest[j-1], gaplen1jbestkamo[j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2jbest[j-1], gaplen2jbestkamo[j-1], gaplen2, lgth2, -1, 0 );
+ copygaplencompactx( gaplen1jbest[j-1], gaplen1jbestkamo[j-1], lgth1, -1, 0, i, i );// *mpjpt, i
+// copygaplenrestricted( gaplen2jbest[j-1], gaplen2jbestkamo[j-1], lgth2, -1, 0, j, j ); // j-1, j
+ copygaplencompactx( gaplen2jbest[j-1], gaplen2jbestkamo[j-1], lgth2, -1, 0, 1, 1 ); // half!
+
+
+
+
+ }
+
+
+// extendgaplenpartly( gaplen1jbest[j-1], gaplen1, i, i ); // tmptmptmp
+// extendgaplenpartly( gaplen2jbest[j-1], gaplen2, 0, 0 ); // tmptmptmp
+
+#if 0
+ sfac = 1.0;
+ for( l=0; gaplen1[i+1]&&(gl=gaplen1[i+1][l].len)!=-1; l++ ) // ososugi! hash ni atode henkou
+ if( gl - 1 == gaplen1[i+1][l].relend ) sfac -= gaplen1[i+1][l].freq;
+ sfac2 = 1.0;
+ for( k=0; gaplen2[j+1]&&(gl=gaplen2[j+1][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou
+ if( gaplen2[j+1][k].relend != -1 ) sfac2 -= gaplen2[j+1][k].freq;
+ sfac *= sfac2;
+#else
+// sfac = 0.0;
+#endif
+
+#if DEBUG
+ reporterr( " (jgap start check i=%d) -> *prept=%f, *mjpt=%f\n", i, seq1[0][i], seq2[0][j], *prept, *mjpt );
+#endif
+
+#if ALGZGAP
+ if( (g=*prept+ ogcp1va* *gf2ptpre) >= *mjpt )
+#else
+// if( (g=*prept + fpenalty * sfac ) >= *mjpt )
+ if( (g=*prept ) >= *mjpt )
+#endif
+ {
+// *mjpt = g - fpenalty * sfac;
+ *mjpt = g;
+ *mpjpt = i-1;
+#if DEBUG
+ reporterr( "Selected as jgap start!\n" );
+#endif
+
+
+#if FREEFREQUENTLY
+ freegaplenpartly( gaplen1jbestkamo[j-1], i-3, i-2 );
+// freegaplenpartly( gaplen2jbestkamo[j-1], j-3, j-2 );
+#endif
+
+
+// addnewgaplen( gaplen1jbestkamo[j-1], gaplen1jprev[j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], gaplen2, lgth2, -1, 0 );
+// reporterr( "copying gaplen1jbestkamo[%d-1] from galpen1jprev, j=%d, i=%d\n", j, j, i );
+ copygaplencompactx( gaplen1jbestkamo[j-1], gaplen1jprev[j-1], lgth1, -1, 0, i, i ); // *mpjpt, i
+// copygaplenrestricted( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], lgth2, -1, 0, j, j ); // j-1, j
+// copygaplencompactx( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], lgth2, -1, 0, j, 1 ); // half!
+// reporterr( "copying gaplen2jbestkamo[%d-1] from galpen2jprev\n", j );
+ copygaplencompactx( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], lgth2-j, -1, 0, 1, 1 ); // ryouhou half!
+
+
+// if( j==2 && i==1 ) exit( 1 );
+
+
+
+ }
+
+// extendgaplenpartly( gaplen1ibestkamo[i-1], gaplen1, 0, 0 ); // tmptmptmp
+// extendgaplenpartly( gaplen2ibestkamo[i-1], gaplen2, j, j ); // tmptmptmp
+
+
+// extendgaplenpartly( gaplen1jbestkamo[j-1], gaplen1, i, i ); // tmptmptmp
+// extendgaplenpartly( gaplen2jbestkamo[j-1], gaplen2, 0, 0 ); // tmptmptmp
+
+
+#if USE_PENALTY_EX
+ m[j] += fpenalty_ex;
+#endif
+
+#if ALGZGAP
+ pfac = 0.0;
+#else
+
+// pfactmp = calcpfac_gapex( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], j, i, i-*mpjpt, seq2[0], seq1[0], 0 ); // j-1
+ pfactmp = calcpfac_gapex_noidatend( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], 1, i, i-*mpjpt, seq2[0]+j, seq1[0], 0 ); // 2jbestkamo->half, 1jbestkamo->full
+#if USEGAPLENMTX
+ pfac = calcpfac_gapex( gaplen2mtx[*mpjpt][j-1], gaplen1mtx[*mpjpt][j-1], j, i, i-*mpjpt, seq2[0], seq1[0], 0 );
+#endif
+#if USEGAPLENHALF
+ pfac = calcpfac_gapex( gaplen2half[*mpjpt][j-1], gaplen1half[*mpjpt][j-1], 1, i-*mpjpt, i-*mpjpt, seq2[0]+j, seq1[0]+*mpjpt, 0 );
+#endif
+#if USEGAPLENMTX || USEGAPLENHALF
+ if( pfac != pfactmp )
+ {
+ reporterr( "(jgapex) pfac=%f, but pfactmp=%f (i,j=%d,%d) diff=%f\n", pfac, pfactmp, i, j, pfac-pfactmp );
+// exit( 1 );
+ PFACERROR = 1;
+ }
+#endif
+ pfac = pfactmp;
+#if DEBUG
+ reporterr( "%c-%c, jgap extension check (j=%d), pfac = %f\n", seq1[0][i], '=', j, pfac );
+#endif
+#endif
+ m[j] += fpenalty * pfac;
+
+
+
+ if( trywarp )
+ {
+#if USE_PENALTY_EX
+ if( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai
+#else
+ if( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai
+#endif
+ {
+// fprintf( stderr, "WARP!!\n" );
+ if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] )
+ {
+ *ijppt = warpbase + warpn - 1;
+ }
+ else
+ {
+ *ijppt = warpbase + warpn;
+ warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) );
+ warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) );
+ warpis[warpn] = prevwarpi[j-1];
+ warpjs[warpn] = prevwarpj[j-1];
+ warpn++;
+ }
+ wm = g;
+ }
+
+#if 0
+ fprintf( stderr, "%5.0f ", wm );
+#endif
+ curm = *curpt + wm;
+
+ if( *wmrecords1pt > *wmrecordspt )
+ {
+ *wmrecordspt = *wmrecords1pt;
+ *warpipt = *(warpipt-1);
+ *warpjpt = *(warpjpt-1);
+ }
+ if( curm > *wmrecordspt )
+ {
+ *wmrecordspt = curm;
+ *warpipt = i;
+ *warpjpt = j;
+ }
+ wmrecordspt++;
+ wmrecords1pt++;
+ warpipt++;
+ warpjpt++;
+ }
+
+#if DEBUG
+ reporterr( "extention-x 1j???, before extention-x, j=%d\n", j );
+ showgaplen( gaplen1jcurr[j], 100 );
+#endif
+ extendgaplencompactx( gaplen1jcurr[j], gaplen1, i );
+
+#if DEBUG
+ reporterr( "after extention-x\n" );
+ showgaplen( gaplen1jcurr[j], 100 );
+ reporterr( "extention-x 2j???\n" );
+#endif
+ extendgaplencompactx( gaplen2jcurr[j], gaplen2+j, 0 );
+
+
+#if 1
+ if( *ijppt < 0 )
+ {
+#if DEBUG
+ reporterr( "Path: %d-%d->%d-%d, i=%d (%c), j=%d (%c), *ijppt=%d\n", i, j, i-1, j+*ijppt, i, seq1[0][i], j, seq2[0][j], *ijppt );
+ reporterr( "Inserting %d gaps to gaplen1 and copying gaplen2 (%c%d-%c%d)\n", -*ijppt-1, seq1[0][i], i, seq2[0][j], j );
+#endif
+#if USEGAPLENMTX
+// addnewgaplen( gaplen1mtx[i][j], gaplen1mtx[i-1][j+*ijppt], gaplen1, lgth1, i, -*ijppt-1 );
+// addnewgaplen( gaplen2mtx[i][j], gaplen2mtx[i-1][j+*ijppt], gaplen2, lgth2, -1, 0 );
+ copygaplenrestricted( gaplen1mtx[i][j], gaplen1mtx[i-1][j+*ijppt], lgth1, i, -*ijppt-1, i, i ); // 0, lgth1
+ copygaplenrestricted( gaplen2mtx[i][j], gaplen2mtx[i-1][j+*ijppt], lgth2, -1, 0, j, j ); // 0, lgth2
+#endif
+#if USEGAPLENHALF
+ copygaplenrestricted_zurasu( gaplen1half[i][j], gaplen1half[i-1][j+*ijppt], lgth1, 0, -*ijppt-1, 0, 0, 1, 1 ); // 0, lgth1
+ copygaplenrestricted_zurasu( gaplen2half[i][j], gaplen2half[i-1][j+*ijppt], lgth2, -1, 0, 0, 0, -*ijppt, -*ijppt ); // 0, lgth2
+#endif
+// addnewgaplen( gaplen1jcurr[j], gaplen1jprev[j+*ijppt], gaplen1, lgth1, i, -*ijppt-1 );
+// addnewgaplen( gaplen2jcurr[j], gaplen2jprev[j+*ijppt], gaplen2, lgth2, -1, 0 );
+// reporterr( "copying gaplen1jcurr from gaplen1jbest, with a %d insertion\n", -*ijppt-1 );
+ copygaplencompactx( gaplen1jcurr[j], gaplen1jprev[j+*ijppt], lgth1, i, -*ijppt-1, i, i ); // scope: i+*ijppt+1, i ?
+// reporterr( "copy end\n" );
+// copygaplenrestricted( gaplen2jcurr[j], gaplen2jprev[j+*ijppt], lgth2, -1, 0, j, j );
+ copygaplencompactx( gaplen2jcurr[j], gaplen2jprev[j+*ijppt], lgth2, -1, 0, 0, -*ijppt ); // half! ryouho zureteru
+ }
+ else if( *ijppt > 0 )
+ {
+#if DEBUG
+ reporterr( "Path: %d-%d->%d-%d, i=%d (%c), j=%d (%c), *ijppt=%d\n", i, j, i-*ijppt, j-1, i, seq1[0][i], j, seq2[0][j], *ijppt );
+ reporterr( "Copying gaplen1 inserting %d gaps to gaplen2 (%c%d-%c%d)\n", *ijppt-1, seq1[0][i], i, seq2[0][j], j );
+#endif
+#if USEGAPLENMTX
+// addnewgaplen( gaplen1mtx[i][j], gaplen1mtx[i-*ijppt][j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2mtx[i][j], gaplen2mtx[i-*ijppt][j-1], gaplen2, lgth2, j, *ijppt-1 );
+ copygaplenrestricted( gaplen1mtx[i][j], gaplen1mtx[i-*ijppt][j-1], lgth1, -1, 0, i, i ); // 0, lgth1
+ copygaplenrestricted( gaplen2mtx[i][j], gaplen2mtx[i-*ijppt][j-1], lgth2, j, *ijppt-1, j, j ); // 0, lgth2
+#endif
+#if USEGAPLENHALF
+ copygaplenrestricted_zurasu( gaplen1half[i][j], gaplen1half[i-*ijppt][j-1], lgth1, -1, 0, 0, 0, *ijppt, *ijppt ); // 0, lgth1
+ copygaplenrestricted_zurasu( gaplen2half[i][j], gaplen2half[i-*ijppt][j-1], lgth2, 0, *ijppt-1, 0, 0, 1, 1 ); // 0, lgth2
+#endif
+// addnewgaplen( gaplen1jcurr[j], gaplen1jbest[j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2jcurr[j], gaplen2jbest[j-1], gaplen2, lgth2, j, *ijppt-1 );
+ copygaplencompactx( gaplen1jcurr[j], gaplen1jbest[j-1], lgth1, -1, 0, i, i );
+// copygaplenrestricted( gaplen2jcurr[j], gaplen2jbest[j-1], lgth2, j, *ijppt-1, j, j ); // j-*ijppt+1?
+// copygaplenrestricted_zurasu( gaplen2jcurr[j], gaplen2jbest[j-1], lgth2, 0, *ijppt-1, 0, 0, j, j ); // 2jcurr->half, but 2jbest->full, imanotokoro
+ copygaplencompactx( gaplen2jcurr[j], gaplen2jbest[j-1], lgth2, 0, *ijppt-1, 0, 1 ); //ryouhou half
+
+ }
+ else
+#endif
+ {
+#if DEBUG
+ reporterr( "Path: %d-%d->%d-%d, i=%d (%c), j=%d (%c), *ijppt=%d\n", i, j, i-1, j-1, i, seq1[0][i], j, seq2[0][j], *ijppt );
+ reporterr( "Copying gaplen1 and gaplen2 (%c%d-%c%d)\n", seq1[0][i], i, seq2[0][j], j );
+#endif
+#if USEGAPLENMTX
+// addnewgaplen( gaplen1mtx[i][j], gaplen1mtx[i-1][j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2mtx[i][j], gaplen2mtx[i-1][j-1], gaplen2, lgth2, -1, 0 );
+ copygaplenrestricted( gaplen1mtx[i][j], gaplen1mtx[i-1][j-1], lgth1, -1, 0, i, i ); // 0, lgth1
+ copygaplenrestricted( gaplen2mtx[i][j], gaplen2mtx[i-1][j-1], lgth2, -1, 0, j, j ); // 0, lgth2
+#endif
+#if USEGAPLENHALF
+ copygaplenrestricted_zurasu( gaplen1half[i][j], gaplen1half[i-1][j-1], lgth1, -1, 0, 0, 0, 1, 1 ); // 0, lgth1
+ copygaplenrestricted_zurasu( gaplen2half[i][j], gaplen2half[i-1][j-1], lgth2, -1, 0, 0, 0, 1, 1 ); // 0, lgth2
+#endif
+// addnewgaplen( gaplen1jcurr[j], gaplen1jprev[j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2jcurr[j], gaplen2jprev[j-1], gaplen2, lgth2, -1, 0 );
+ copygaplencompactx( gaplen1jcurr[j], gaplen1jprev[j-1], lgth1, -1, 0, i, i );
+// copygaplenrestricted( gaplen2jcurr[j], gaplen2jprev[j-1], lgth2, -1, 0, j, j );
+ copygaplencompactx( gaplen2jcurr[j], gaplen2jprev[j-1], lgth2, -1, 0, 0, 1 ); // half
+ }
+
+#if DEBUG
+ reporterr( "at the end of j loop, gaplen1jcurr[%d] = \n", j );
+ showgaplen( gaplen1jcurr[j], 100 );
+ reporterr( "at the end of j loop, gaplen1prev[%d] = \n", j );
+ showgaplen( gaplen1jprev[j], 100 );
+#endif
+
+
+
+#if 1
+ freegaplenpartly( gaplen1jcurr[j-1], i-3, i-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // half!
+ freegaplenpartly( gaplen1jbestkamo[j-1], i-3, i-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jbestkamo[j-1], j-3, j-2 ); // -1 dehanaku -2??
+ freegaplenpartly( gaplen1jbest[j-1], i-3, i-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jbest[j-1], j-3, j-2 ); // -1 dehanaku -2??
+#else
+ freegaplenpartly( gaplen1jprev[j-1], 0, i-2 ); // -1 dehanaku -2??
+ freegaplenpartly( gaplen1jcurr[j-1], 0, i-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // half!
+ freegaplenpartly( gaplen1jbestkamo[j-1], 0, i-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jbestkamo[j-1], j-3, j-2 ); // -1 dehanaku -2??
+ freegaplenpartly( gaplen1jbest[j-1], 0, i-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jbest[j-1], j-3, j-2 ); // -1 dehanaku -2??
+#endif
+
+
+#if USEGAPLENMTX
+// freegaplenpartly( gaplen1mtx[i-1][j-1], 0, i-2 );
+// freegaplenpartly( gaplen2mtx[i-1][j-1], 0, j-2 );
+#endif
+
+
+ *curpt++ += wm;
+ ijppt++;
+ mjpt++;
+ prept++;
+ mpjpt++;
+ }
+ lastverticalw[i] = currentw[lgth2-1];
+
+#if 1
+// freegaplenpartly( gaplen1icurr[i-1], i-1, i-1 );
+ freegaplenpartly( gaplen1icurr[i-1], 0, lgth1-i );
+ freegaplenpartly( gaplen2icurr[i-1], 0, lgth2 );
+// freegaplenpartly( gaplen1ibestkamo[i-1], i-1, i-1 );
+ freegaplenpartly( gaplen1ibestkamo[i-1], 0, lgth1-i );
+ freegaplenpartly( gaplen2ibestkamo[i-1], 0, lgth2 );
+#endif
+
+ if( trywarp )
+ {
+ fltncpy( prevwmrecords, wmrecords, lastj );
+ intncpy( prevwarpi, warpi, lastj );
+ intncpy( prevwarpj, warpj, lastj );
+ }
+#if 0
+ fprintf( stderr, "i=%d, %15.5f \n", i, wm );
+#endif
+//if( i == 2 ) exit( 1 );
+ }
+
+ if( trywarp )
+ {
+// fprintf( stderr, "wm = %f\n", wm );
+// fprintf( stderr, "warpn = %d\n", warpn );
+ free( wmrecords );
+ free( prevwmrecords );
+ free( warpi );
+ free( warpj );
+ free( prevwarpi );
+ free( prevwarpj );
+ }
+
+
+#if OUTGAP0TRY
+ if( !outgap )
+ {
+ for( j=1; j<lgth2+1; j++ )
+ currentw[j] -= offset * ( lgth2 - j ) / 2.0;
+ for( i=1; i<lgth1+1; i++ )
+ lastverticalw[i] -= offset * ( lgth1 - i / 2.0);
+ }
+#endif
+
+ /*
+ fprintf( stderr, "\n" );
+ for( i=0; i<icyc; i++ ) fprintf( stderr,"%s\n", seq1[i] );
+ fprintf( stderr, "#####\n" );
+ for( j=0; j<jcyc; j++ ) fprintf( stderr,"%s\n", seq2[j] );
+ fprintf( stderr, "====>" );
+ for( i=0; i<icyc; i++ ) strcpy( mseq1[i], seq1[i] );
+ for( j=0; j<jcyc; j++ ) strcpy( mseq2[j], seq2[j] );
+ */
+ if( localhom )
+ {
+ Atracking_localhom( impmatch, currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, warpis, warpjs, warpbase );
+ }
+ else
+ Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, tailgp, warpis, warpjs, warpbase );
+
+ if( warpis ) free( warpis );
+ if( warpjs ) free( warpjs );
+
+// fprintf( stderr, "### impmatch = %f\n", *impmatch );
+
+ resultlen = strlen( mseq1[0] );
+ if( alloclen < resultlen || resultlen > N )
+ {
+ fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
+ ErrorExit( "LENGTH OVER!\n" );
+ }
+
+
+ for( i=0; i<icyc; i++ ) strcpy( seq1[i], mseq1[i] );
+ for( j=0; j<jcyc; j++ ) strcpy( seq2[j], mseq2[j] );
+#if 0
+ fprintf( stderr, "\n" );
+ for( i=0; i<icyc; i++ ) fprintf( stderr, "%s\n", mseq1[i] );
+ fprintf( stderr, "#####\n" );
+ for( j=0; j<jcyc; j++ ) fprintf( stderr, "%s\n", mseq2[j] );
+#endif
+
+// reporterr( "clearing\n" );
+ cleargaplens( gaplens );
+
+#if USEGAPLENMTX
+/* maikai free */
+ reporterr( "Freeing!\n" );
+ for( i=0; i<lgth1+1; i++ )
+ {
+ for( j=0; j<lgth2+1; j++ )
+ {
+ if( gaplen1mtx[i][j] ) FreeGaplenMtx( gaplen1mtx[i][j], 0 );
+ gaplen1mtx[i][j] = NULL;
+ }
+ free( gaplen1mtx[i] );
+ gaplen1mtx[i] = NULL;
+ }
+ free( gaplen1mtx );
+ gaplen1mtx = NULL;
+
+ for( i=0; i<lgth1+1; i++ )
+ {
+ for( j=0; j<lgth2+1; j++ )
+ {
+ if( gaplen2mtx[i][j] ) FreeGaplenMtx( gaplen2mtx[i][j], 0 );
+ gaplen2mtx[i][j] = NULL;
+ }
+ free( gaplen2mtx[i] );
+ gaplen2mtx[i] = NULL;
+ }
+ free( gaplen2mtx );
+ gaplen2mtx = NULL;
+#endif
+
+
+#if USEGAPLENHALF
+ for( i=0; i<lgth1+1; i++ )
+ {
+ for( j=0; j<lgth2+1; j++ )
+ {
+ if( gaplen1half[i][j] ) FreeGaplenMtx( gaplen1half[i][j], 0 );
+ gaplen1half[i][j] = NULL;
+ }
+ free( gaplen1half[i] );
+ gaplen1half[i] = NULL;
+ }
+ free( gaplen1half );
+ gaplen1half = NULL;
+
+ for( i=0; i<lgth1+1; i++ )
+ {
+ for( j=0; j<lgth2+1; j++ )
+ {
+ if( gaplen2half[i][j] ) FreeGaplenMtx( gaplen2half[i][j], 0 );
+ gaplen2half[i][j] = NULL;
+ }
+ free( gaplen2half[i] );
+ gaplen2half[i] = NULL;
+ }
+ free( gaplen2half );
+ gaplen2half = NULL;
+#endif
+/* maikai free */
+
+
+#if WMCHECK
+ fprintf( stderr, "wm = %f\n", wm - *impmatch);
+ fprintf( stderr, "*impmatch = %f\n", *impmatch);
+
+ int kenzan = 0;
+ for( i=0; i<icyc; i++ ) for( j=0; j<jcyc; j++ )
+ {
+ kenzan += pairgapcount( mseq1[i], mseq2[j] );
+ }
+
+
+ reporterr( "kenzan = %d -> %f\n", kenzan, (double)kenzan /( icyc*jcyc ) );
+
+ double pairscore, nogappairscore;
+ char **pseq;
+ pseq = AllocateCharMtx( 2, strlen( seq1[0] ) + 1 );
+ pairscore = nogappairscore = 0.0;
+ for( i=0; i<icyc; i++ ) for( j=0; j<jcyc; j++ )
+ {
+ strcpy( pseq[0], seq1[i] );
+ strcpy( pseq[1], seq2[j] );
+ commongappick( 2, pseq );
+ pairscore += eff1[i] * eff2[j] * naivepairscore11_dynmtx( n_dynamicmtx, pseq[0], pseq[1], penalty );
+ nogappairscore += eff1[i] * eff2[j] * naivepairscore11_dynmtx( n_dynamicmtx, pseq[0], pseq[1], 0 );
+ }
+
+ FreeCharMtx( pseq );
+ reporterr( "pairscore = %f\n", (double)pairscore );
+ reporterr( "pairscore-nogappairscore = %f\n", (double)(pairscore-nogappairscore) );
+ reporterr( "pairscore-nogappairscore / penalty = %f\n", (double)(pairscore-nogappairscore)/(double)(fpenalty) );
+ reporterr( "diff = %f\n\n", (pairscore - wm + *impmatch ) / fpenalty );
+
+#if 1
+ if( ( !trywarp && abs( pairscore - wm +*impmatch ) > 0.01 ) || PFACERROR )
+// if( abs( pairscore - wm +*impmatch ) > 0.01 )
+#else
+ if( abs( pairscore - wm +*impmatch ) > 0.01 )
+#endif
+// if( abs( pairscore - wm +*impmatch ) > 0.01 )
+ {
+ for( i=0; i<icyc; i++ )
+ printf( ">group1\n%s\n", seq1[i] );
+ for( j=0; j<jcyc; j++ )
+ printf( ">group2\n%s\n", seq2[j] );
+ exit( 1 );
+ }
+#else
+ reporterr( "\n" );
+#endif
+
+#if 0
+// if( strlen( seq1[0] ) - lgth1 > 100 && icyc > 1 || strlen( seq2[0] ) - lgth2 > 100 & jcyc > 1 )
+ if( strstr( seq1[0], "LNDDAT" ) && icyc == 1 || strstr( seq2[0], "LNDDAT" ) && jcyc==1)
+ {
+ for( i=0; i<icyc; i++ )
+ printf( ">group1\n%s\n", seq1[i] );
+ for( j=0; j<jcyc; j++ )
+ printf( ">group2\n%s\n", seq2[j] );
+ exit( 1 );
+ }
+#endif
+
+
+ return( wm );
+}
+
+
+double D__align_ls( double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp )
+{
+ int v1, v2;
+ double val;
+
+#if 1
+ v1 = gapvariety( icyc, strlen( seq1[0] ), seq1 );
+ v2 = gapvariety( jcyc, strlen( seq2[0] ), seq2 );
+#else
+ v1 = icyc;
+ v2 = jcyc;
+#endif
+
+// reporterr( "\nicyc,jcyc = %d,%d\n", icyc, jcyc );
+ reporterr( " v1,v2 = %d,%d\n", v1, v2 );
+
+ if( v1 >= v2 )
+ {
+ val = D__align( n_dynamicmtx, seq1, seq2, eff1, eff2, icyc, jcyc, alloclen, localhom, impmatch, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp );
+ }
+ else
+ {
+ val = D__align( n_dynamicmtx, seq2, seq1, eff2, eff1, jcyc, icyc, alloclen, localhom, impmatch, sgap2, sgap1, egap2, egap1, chudanpt, chudanref, chudanres, headgp, tailgp );
+ }
+ return val;
+}
+
+
+
+double D__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, int *gapmap1, int *gapmap2 )
+/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
+{
+ fprintf( stderr, "Unexpected error. Please contact kazutaka.katoh@aist.go.jp\n" );
+ exit( 1 );
+}
+
+
+double D__align_variousdist( int **which, double ***matrices, double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp )
+/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
+{
+
+// int k;
+ register int i, j, c;
+ int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
+ int lgth1, lgth2;
+ int resultlen;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
+// double fpenalty = (double)penalty;
+#if USE_PENALTY_EX
+ double fpenalty_ex = (double)penalty_ex;
+#endif
+#if 1
+ double *wtmp;
+ int *ijppt;
+ double *mjpt, *prept, *curpt;
+ int *mpjpt;
+#endif
+ static TLS double mi, *m;
+ static TLS int **ijp;
+ static TLS int mpi, *mp;
+ static TLS double *w1, *w2;
+ static TLS double *match;
+ static TLS double *initverticalw; /* kufuu sureba iranai */
+ static TLS double *lastverticalw; /* kufuu sureba iranai */
+ static TLS char **mseq1;
+ static TLS char **mseq2;
+ static TLS char **mseq;
+ static TLS double ***cpmx1s;
+ static TLS double ***cpmx2s;
+ static TLS int ***intwork;
+ static TLS double ***doublework;
+ static TLS int orlgth1 = 0, orlgth2 = 0;
+#if USEGAPLENHALF
+ Gaplen ****gaplen1half = NULL; // NULL ga iru to omou.
+ Gaplen ****gaplen2half = NULL; // NULL ga iru to omou.
+#endif
+#if USEGAPLENMTX
+ Gaplen ****gaplen1mtx = NULL; // NULL ga iru to omou.
+ Gaplen ****gaplen2mtx = NULL; // NULL ga iru to omou.
+#endif
+ static TLS Gaplen **gaplen1 = NULL; // NULL ga iru to omou.
+ static TLS Gaplen **gaplen2 = NULL; // NULL ga iru to omou.
+ static TLS Gaplen ***gaplen1jprev = NULL;
+ static TLS Gaplen ***gaplen2jprev = NULL;
+ static TLS Gaplen ***gaplen1jcurr = NULL;
+ static TLS Gaplen ***gaplen2jcurr = NULL;
+ static TLS Gaplen ***gaplen1icurr = NULL;
+ static TLS Gaplen ***gaplen2icurr = NULL;
+ static TLS Gaplen ***gaplen1jbestkamo = NULL;
+ static TLS Gaplen ***gaplen2jbestkamo = NULL;
+ static TLS Gaplen ***gaplen1ibestkamo = NULL;
+ static TLS Gaplen ***gaplen2ibestkamo = NULL;
+ static TLS Gaplen ***gaplen1jbest = NULL;
+ static TLS Gaplen ***gaplen2jbest = NULL;
+ double fpenalty = (double)penalty;
+ double fpenalty_shift = (double)penalty_shift;
+ static TLS Gaplen ****gaplens = NULL;
+
+ Gaplen ***gaplentmp = NULL;
+ int *warpis = NULL;
+ int *warpjs = NULL;
+ int *warpi = NULL;
+ int *warpj = NULL;
+ int *prevwarpi = NULL;
+ int *prevwarpj = NULL;
+ double *wmrecords = NULL;
+ double *prevwmrecords = NULL;
+ int warpn = 0;
+ int warpbase;
+ double curm = 0.0;
+ double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt;
+ int *warpipt, *warpjpt;
+ int k;
+ double pfac, pfactmp;
+ int newgaplen;
+ int **masklist1 = NULL, **masklist2 = NULL;
+ int *nmask;
+
+// for( i=0; i<icyc; i++ ) fprintf( stderr, "%s, %f\n", seq1[i], eff1[i] );
+// for( i=0; i<jcyc; i++ ) fprintf( stderr, "%s, %f\n", seq2[i], eff2[i] );
+
+// reporterr( "\nsizeof(gaplen) = %d\n", sizeof( Gaplen ) );
+// reporterr( "\nsizeof(int) = %d\n", sizeof( int ) );
+// reporterr( "\nsizeof(double) = %d\n", sizeof( double ) );
+// reporterr( "\nsizeof(double*) = %d\n", sizeof( double * ) );
+
+
+ if( seq1 == NULL )
+ {
+ if( orlgth1 )
+ {
+// fprintf( stderr, "## Freeing local arrays in D__align\n" );
+ orlgth1 = 0;
+ orlgth2 = 0;
+
+ imp_match_init_strictD( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL );
+
+ free( mseq1 );
+ free( mseq2 );
+ FreeFloatVec( w1 );
+ FreeFloatVec( w2 );
+ FreeFloatVec( match );
+ FreeFloatVec( initverticalw );
+ FreeFloatVec( lastverticalw );
+
+ FreeFloatVec( m );
+ FreeIntVec( mp );
+
+ FreeCharMtx( mseq );
+
+ FreeFloatCub( cpmx1s );
+ FreeFloatCub( cpmx2s );
+
+ FreeFloatCub( doublework );
+ FreeIntCub( intwork );
+
+
+
+ free( gaplens );
+ if( gaplen1ibestkamo ) FreeGaplenCub( gaplen1ibestkamo ); gaplen1ibestkamo = NULL;
+ if( gaplen2ibestkamo ) FreeGaplenCub( gaplen2ibestkamo ); gaplen2ibestkamo = NULL;
+ if( gaplen1icurr ) FreeGaplenCub( gaplen1icurr ); gaplen1icurr = NULL;
+ if( gaplen2icurr ) FreeGaplenCub( gaplen2icurr ); gaplen2icurr = NULL;
+
+ if( gaplen1jprev ) FreeGaplenCub( gaplen1jprev ); gaplen1jprev = NULL;
+ if( gaplen2jprev ) FreeGaplenCub( gaplen2jprev ); gaplen2jprev = NULL;
+ if( gaplen1jcurr ) FreeGaplenCub( gaplen1jcurr ); gaplen1jcurr = NULL;
+ if( gaplen2jcurr ) FreeGaplenCub( gaplen2jcurr ); gaplen2jcurr = NULL;
+ if( gaplen1jbestkamo ) FreeGaplenCub( gaplen1jbestkamo ); gaplen1jbestkamo = NULL;
+ if( gaplen2jbestkamo ) FreeGaplenCub( gaplen2jbestkamo ); gaplen2jbestkamo = NULL;
+ if( gaplen1jbest ) FreeGaplenCub( gaplen1jbest ); gaplen1jbest = NULL;
+ if( gaplen2jbest ) FreeGaplenCub( gaplen2jbest ); gaplen2jbest = NULL;
+ if( gaplen1 ) FreeGaplenMtx( gaplen1, 1 ); gaplen1 = NULL;
+ if( gaplen2 ) FreeGaplenMtx( gaplen2, 1 ); gaplen2 = NULL;
+
+ }
+ else
+ {
+// fprintf( stderr, "## Not allocated\n" );
+ }
+ return( 0.0 );
+ }
+
+
+#if SLOW
+ nmask = calloc( maxdistclass, sizeof( int ) );
+#else
+ masklist1 = AllocateIntMtx( maxdistclass, 0 );
+ masklist2 = AllocateIntMtx( maxdistclass, 0 );
+ nmask = calloc( maxdistclass, sizeof( int ) );
+
+ for( c=0; c<maxdistclass; c++ )
+ {
+ for( i=0; i<icyc; i++ ) for( j=0; j<jcyc; j++ )
+ {
+ if( eff1s[c][i] * eff2s[c][j] != 0.0 )
+ {
+// reporterr( "%d-%d, c=%d\n", i, j, c );
+ if( c != which[i][j] )
+ {
+ masklist1[c] = realloc( masklist1[c], sizeof( int ) * nmask[c]+1 );
+ masklist2[c] = realloc( masklist2[c], sizeof( int ) * nmask[c]+1 );
+
+ masklist1[c][nmask[c]] = i;
+ masklist2[c][nmask[c]] = j;
+ nmask[c]++;
+ }
+ }
+ }
+ }
+ for( c=0; c<maxdistclass; c++ ) if( nmask[c] ) break;
+ if( c<maxdistclass ) reporterr( "Found a complex grouping. This step may be a bit slow.\n" );
+#endif
+
+
+ lgth1 = strlen( seq1[0] );
+ lgth2 = strlen( seq2[0] );
+
+
+ reporterr( "%d x %d sequences, len=%d, %d\n", icyc, jcyc, lgth1, lgth2 );
+
+
+#if 0
+ if( lgth1 == 0 || lgth2 == 0 )
+ {
+ fprintf( stderr, "WARNING (Aalignmm): lgth1=%d, lgth2=%d\n", lgth1, lgth2 );
+ }
+#endif
+ if( lgth1 == 0 && lgth2 == 0 )
+ return( 0.0 );
+
+ if( lgth1 == 0 )
+ {
+ for( i=0; i<icyc; i++ )
+ {
+ j = lgth2;
+ seq1[i][j] = 0;
+ while( j ) seq1[i][--j] = *newgapstr;
+// fprintf( stderr, "seq1[i] = %s\n", seq1[i] );
+ }
+ return( 0.0 );
+ }
+
+ if( lgth2 == 0 )
+ {
+ for( i=0; i<jcyc; i++ )
+ {
+ j = lgth1;
+ seq2[i][j] = 0;
+ while( j ) seq2[i][--j] = *newgapstr;
+// fprintf( stderr, "seq2[i] = %s\n", seq2[i] );
+ }
+ return( 0.0 );
+ }
+
+ warpbase = lgth1 + lgth2;
+ warpis = NULL;
+ warpjs = NULL;
+ warpn = 0;
+
+
+
+ if( trywarp )
+ {
+// reporterr( "Not supported yet!\n" );
+// exit( 1 );
+// fprintf( stderr, "IN D__align, penalty_shift = %d\n", penalty_shift );
+ if( headgp == 0 || tailgp == 0 )
+ {
+ fprintf( stderr, "At present, headgp and tailgp must be 1 to allow shift.\n" );
+ exit( 1 );
+ }
+ wmrecords = AllocateFloatVec( lgth2+1 );
+ warpi = AllocateIntVec( lgth2+1 );
+ warpj = AllocateIntVec( lgth2+1 );
+ prevwmrecords = AllocateFloatVec( lgth2+1 );
+ prevwarpi = AllocateIntVec( lgth2+1 );
+ prevwarpj = AllocateIntVec( lgth2+1 );
+ for( i=0; i<lgth2+1; i++ ) wmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwarpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) prevwarpj[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpj[i] = -warpbase;
+ }
+
+
+#if 0
+ fprintf( stderr, "#### eff in SA+++align\n" );
+ fprintf( stderr, "#### seq1[0] = %s\n", seq1[0] );
+ fprintf( stderr, "#### strlen( seq1[0] ) = %d\n", strlen( seq1[0] ) );
+ for( i=0; i<icyc; i++ ) fprintf( stderr, "eff1[%d] = %f\n", i, eff1[i] );
+ fprintf( stderr, "#### seq2[0] = %s\n", seq2[0] );
+ fprintf( stderr, "#### strlen( seq2[0] ) = %d\n", strlen( seq2[0] ) );
+ for( i=0; i<jcyc; i++ ) fprintf( stderr, "eff2[%d] = %f\n", i, eff2[i] );
+#endif
+ if( orlgth1 == 0 )
+ {
+ mseq1 = AllocateCharMtx( njob, 0 );
+ mseq2 = AllocateCharMtx( njob, 0 );
+ }
+
+ if( lgth1 > orlgth1 || lgth2 > orlgth2 )
+ {
+ int ll1, ll2;
+
+
+ if( orlgth1 > 0 && orlgth2 > 0 )
+ {
+ FreeFloatVec( w1 );
+ FreeFloatVec( w2 );
+ FreeFloatVec( match );
+ FreeFloatVec( initverticalw );
+ FreeFloatVec( lastverticalw );
+
+ FreeFloatVec( m );
+ FreeIntVec( mp );
+
+ FreeCharMtx( mseq );
+
+ FreeFloatCub( cpmx1s );
+ FreeFloatCub( cpmx2s );
+
+ FreeFloatCub( doublework );
+ FreeIntCub( intwork );
+
+
+ free( gaplens );
+
+ if( gaplen1ibestkamo ) FreeGaplenCub( gaplen1ibestkamo ); gaplen1ibestkamo = NULL;
+ if( gaplen2ibestkamo ) FreeGaplenCub( gaplen2ibestkamo ); gaplen2ibestkamo = NULL;
+ if( gaplen1icurr ) FreeGaplenCub( gaplen1icurr ); gaplen1icurr = NULL;
+ if( gaplen2icurr ) FreeGaplenCub( gaplen2icurr ); gaplen2icurr = NULL;
+
+ if( gaplen1jcurr ) FreeGaplenCub( gaplen1jcurr ); gaplen1jcurr = NULL;
+ if( gaplen1jprev ) FreeGaplenCub( gaplen1jprev ); gaplen1jprev = NULL;
+ if( gaplen2jcurr ) FreeGaplenCub( gaplen2jcurr ); gaplen2jcurr = NULL;
+ if( gaplen2jprev ) FreeGaplenCub( gaplen2jprev ); gaplen2jprev = NULL;
+ if( gaplen1jbestkamo ) FreeGaplenCub( gaplen1jbestkamo ); gaplen1jbestkamo = NULL;
+ if( gaplen2jbestkamo ) FreeGaplenCub( gaplen2jbestkamo ); gaplen2jbestkamo = NULL;
+ if( gaplen1jbest ) FreeGaplenCub( gaplen1jbest ); gaplen1jbest = NULL;
+ if( gaplen2jbest ) FreeGaplenCub( gaplen2jbest ); gaplen2jbest = NULL;
+ if( gaplen1 ) FreeGaplenMtx( gaplen1, 1 ); gaplen1 = NULL;
+ if( gaplen2 ) FreeGaplenMtx( gaplen2, 1 ); gaplen2 = NULL;
+
+ }
+
+ ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
+ ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;
+
+#if DEBUG
+ fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );
+#endif
+
+ w1 = AllocateFloatVec( ll2+2 );
+ w2 = AllocateFloatVec( ll2+2 );
+ match = AllocateFloatVec( ll2+2 );
+
+ initverticalw = AllocateFloatVec( ll1+2 );
+ lastverticalw = AllocateFloatVec( ll1+2 );
+
+ m = AllocateFloatVec( ll2+2 );
+ mp = AllocateIntVec( ll2+2 );
+
+ mseq = AllocateCharMtx( njob, ll1+ll2 );
+
+ cpmx1s = AllocateFloatCub( maxdistclass, nalphabets, ll1+2 );
+ cpmx2s = AllocateFloatCub( maxdistclass, nalphabets, ll2+2 );
+
+ doublework = AllocateFloatCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets );
+ intwork = AllocateIntCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets+1 );
+
+#if DEBUG
+ fprintf( stderr, "succeeded\n" );
+#endif
+
+ orlgth1 = ll1 - 100;
+ orlgth2 = ll2 - 100;
+
+
+// reporterr( "Allocating gaplen1 and gaplen2\n" );
+ gaplen1 = (Gaplen ** )calloc( ll1+2, sizeof( Gaplen * ) );
+ gaplen1[ll1+1] = (Gaplen *)1;
+ gaplen2 = (Gaplen ** )calloc( ll2+2, sizeof( Gaplen * ) );
+ gaplen2[ll2+1] = (Gaplen *)1;
+
+
+// reporterr( "Allocating gaplen*\n" );
+ gaplen1ibestkamo = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) );
+ gaplen2ibestkamo = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) );
+ gaplen1icurr = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) );
+ gaplen2icurr = (Gaplen ***)calloc( (ll1+2), sizeof( Gaplen **) );
+ gaplen1jbestkamo = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen2jbestkamo = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen1jbest = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen2jbest = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen1jcurr = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen2jcurr = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen1jprev = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+ gaplen2jprev = (Gaplen ***)calloc( (ll2+2), sizeof( Gaplen **) );
+
+ gaplens = calloc( sizeof( Gaplen ***), 12 );
+ gaplens[0] = gaplen1ibestkamo;
+ gaplens[1] = gaplen2ibestkamo;
+ gaplens[2] = gaplen1icurr;
+ gaplens[3] = gaplen2icurr;
+ gaplens[4] = gaplen1jbestkamo;
+ gaplens[5] = gaplen2jbestkamo;
+ gaplens[6] = gaplen1jbest;
+ gaplens[7] = gaplen2jbest;
+ gaplens[8] = gaplen1jcurr;
+ gaplens[9] = gaplen2jcurr;
+ gaplens[10] = gaplen1jprev;
+ gaplens[11] = gaplen2jprev;
+// reporterr( "Allocation end\n" );
+ }
+
+ {
+ int ll1 = lgth1;
+ int ll2 = lgth2;
+
+// reporterr( "Allocating gaplen*i\n" );
+ for(i=0; i<ll1+1; i++ )
+ {
+ gaplen1ibestkamo[i] = (Gaplen **)calloc( ll1+2-i, sizeof( Gaplen * ) );
+ for( k=0; k<ll1+1-i; k++ ) gaplen1ibestkamo[i][k] = NULL;
+ gaplen1ibestkamo[i][k] = (Gaplen *)1;
+
+ gaplen2ibestkamo[i] = (Gaplen **)calloc( ll2+2, sizeof( Gaplen * ) );
+ for( k=0; k<ll2+1; k++ ) gaplen2ibestkamo[i][k] = NULL;
+ gaplen2ibestkamo[i][k] = (Gaplen *)1;
+
+ gaplen1icurr[i] = (Gaplen **)calloc( ll1+2-i, sizeof( Gaplen * ) );
+ for( k=0; k<ll1+1-i; k++ ) gaplen1icurr[i][k] = NULL;
+ gaplen1icurr[i][k] = (Gaplen *)1;
+
+ gaplen2icurr[i] = (Gaplen **)calloc( ll2+2, sizeof( Gaplen * ) );
+ for( k=0; k<ll2+1; k++ ) gaplen2icurr[i][k] = NULL;
+ gaplen2icurr[i][k] = (Gaplen *)1;
+ }
+ gaplen1ibestkamo[ll1+1] = NULL;
+ gaplen2ibestkamo[ll1+1] = NULL;
+ gaplen1icurr[ll1+1] = NULL;
+ gaplen2icurr[ll1+1] = NULL;
+
+// reporterr( "Allocating gaplen*j\n" );
+ for(i=0; i<ll2+1; i++ )
+ {
+ gaplen1jbestkamo[i] = (Gaplen **)calloc( ll1+2, sizeof( Gaplen * ) );
+ for( k=0; k<ll1+1; k++ ) gaplen1jbestkamo[i][k] = NULL;
+ gaplen1jbestkamo[i][k] = (Gaplen *)1;
+
+ gaplen2jbestkamo[i] = (Gaplen **)calloc( ll2+2-i, sizeof( Gaplen * ) );
+ for( k=0; k<ll2+1-i; k++ ) gaplen2jbestkamo[i][k] = NULL;
+ gaplen2jbestkamo[i][k] = (Gaplen *)1;
+
+ gaplen1jbest[i] = (Gaplen **)calloc( ll1+2, sizeof( Gaplen * ) );
+ for( k=0; k<ll1+1; k++ ) gaplen1jbest[i][k] = NULL;
+ gaplen1jbest[i][k] = (Gaplen *)1;
+
+ gaplen2jbest[i] = (Gaplen **)calloc( ll2+2-i, sizeof( Gaplen * ) );
+ for( k=0; k<ll2+1-i; k++ ) gaplen2jbest[i][k] = NULL;
+ gaplen2jbest[i][k] = (Gaplen *)1;
+
+ gaplen1jcurr[i] = (Gaplen **)calloc( ll1+2, sizeof( Gaplen * ) );
+ for( k=0; k<ll1+1; k++ ) gaplen1jcurr[i][k] = NULL;
+ gaplen1jcurr[i][k] = (Gaplen *)1;
+
+ gaplen2jcurr[i] = (Gaplen **)calloc( ll2+2-i, sizeof( Gaplen * ) );
+ for( k=0; k<ll2+1-i; k++ ) gaplen2jcurr[i][k] = NULL;
+ gaplen2jcurr[i][k] = (Gaplen *)1;
+
+ gaplen1jprev[i] = (Gaplen **)calloc( ll1+2, sizeof( Gaplen * ) );
+ for( k=0; k<ll1+1; k++ ) gaplen1jprev[i][k] = NULL;
+ gaplen1jprev[i][k] = (Gaplen *)1;
+
+ gaplen2jprev[i] = (Gaplen **)calloc( ll2+2-i, sizeof( Gaplen * ) );
+ for( k=0; k<ll2+1-i; k++ ) gaplen2jprev[i][k] = NULL;
+ gaplen2jprev[i][k] = (Gaplen *)1;
+
+ }
+ gaplen1jbestkamo[ll2+1] = NULL;
+ gaplen2jbestkamo[ll2+1] = NULL;
+ gaplen1jbest[ll2+1] = NULL;
+ gaplen2jbest[ll2+1] = NULL;
+ gaplen1jcurr[ll2+1] = NULL;
+ gaplen2jcurr[ll2+1] = NULL;
+ gaplen1jprev[ll2+1] = NULL;
+ gaplen2jprev[ll2+1] = NULL;
+ }
+
+
+#if USEGAPLENMTX
+/* maikai allocate */
+
+ reporterr( "Allocating gaplenmtx1\n" );
+ gaplen1mtx = (Gaplen ****)calloc( (lgth1+2), sizeof( Gaplen ***) );
+ for(i=0; i<lgth1+1; i++ ) gaplen1mtx[i] = (Gaplen ***)calloc( lgth2+2, sizeof( Gaplen ** ) );
+ for(i=0; i<lgth1+1; i++ )
+ {
+ for(j=0; j<lgth2+1; j++ )
+ {
+ gaplen1mtx[i][j] = (Gaplen **)calloc( lgth1+2, sizeof( Gaplen * ) );
+ for( k=0; k<lgth1+1; k++ ) gaplen1mtx[i][j][k] = NULL;
+ gaplen1mtx[i][j][k] = (Gaplen *)1;
+ }
+ gaplen1mtx[i][j] = NULL;
+ }
+ gaplen1mtx[i] = NULL;
+
+ reporterr( "Allocating gaplenmtx2\n" );
+ gaplen2mtx = (Gaplen ****)calloc( (lgth1+2), sizeof( Gaplen ***) );
+ for(i=0; i<lgth1+1; i++ ) gaplen2mtx[i] = (Gaplen ***)calloc( lgth2+2, sizeof( Gaplen ** ) );
+ for(i=0; i<lgth1+1; i++ )
+ {
+ for(j=0; j<lgth2+1; j++ )
+ {
+ gaplen2mtx[i][j] = (Gaplen **)calloc( lgth2+2, sizeof( Gaplen * ) );
+ for( k=0; k<lgth2+1; k++ ) gaplen2mtx[i][j][k] = NULL;
+ gaplen2mtx[i][j][k] = (Gaplen *)1;
+ }
+ gaplen2mtx[i][j] = NULL;
+ }
+ gaplen2mtx[i] = NULL;
+
+#endif
+
+#if USEGAPLENHALF
+ reporterr( "Allocating gaplenhalf1\n" );
+ gaplen1half = (Gaplen ****)calloc( (lgth1+2), sizeof( Gaplen ***) );
+ for(i=0; i<lgth1+1; i++ ) gaplen1half[i] = (Gaplen ***)calloc( lgth2+2, sizeof( Gaplen ** ) );
+ for(i=0; i<lgth1+1; i++ )
+ {
+ for(j=0; j<lgth2+1; j++ )
+ {
+ gaplen1half[i][j] = (Gaplen **)calloc( lgth1+2 - i, sizeof( Gaplen * ) );
+ for( k=0; k<lgth1+1-i; k++ ) gaplen1half[i][j][k] = NULL;
+ gaplen1half[i][j][k] = (Gaplen *)1;
+ }
+ gaplen1half[i][j] = NULL;
+ }
+ gaplen1half[i] = NULL;
+
+ reporterr( "Allocating gaplenhalf2\n" );
+ gaplen2half = (Gaplen ****)calloc( (lgth1+2), sizeof( Gaplen ***) );
+ for(i=0; i<lgth1+1; i++ ) gaplen2half[i] = (Gaplen ***)calloc( lgth2+2, sizeof( Gaplen ** ) );
+ for(i=0; i<lgth1+1; i++ )
+ {
+ for(j=0; j<lgth2+1; j++ )
+ {
+ gaplen2half[i][j] = (Gaplen **)calloc( lgth2+2 - j, sizeof( Gaplen * ) );
+ for( k=0; k<lgth2+1-j; k++ ) gaplen2half[i][j][k] = NULL;
+ gaplen2half[i][j][k] = (Gaplen *)1;
+ }
+ gaplen2half[i][j] = NULL;
+ }
+ gaplen2half[i] = NULL;
+#endif
+
+
+/* maikai allocate */
+
+
+ for( i=0; i<icyc; i++ )
+ {
+ mseq1[i] = mseq[i];
+ seq1[i][lgth1] = 0;
+ }
+ for( j=0; j<jcyc; j++ )
+ {
+ mseq2[j] = mseq[icyc+j];
+ seq2[j][lgth2] = 0;
+ }
+
+
+ if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )
+ {
+ int ll1, ll2;
+
+ if( commonAlloc1 && commonAlloc2 )
+ {
+ FreeIntMtx( commonIP );
+ }
+
+ ll1 = MAX( orlgth1, commonAlloc1 );
+ ll2 = MAX( orlgth2, commonAlloc2 );
+
+#if DEBUG
+ fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
+#endif
+
+ commonIP = AllocateIntMtx( ll1+10, ll2+10 );
+
+#if DEBUG
+ fprintf( stderr, "succeeded\n\n" );
+#endif
+
+ commonAlloc1 = ll1;
+ commonAlloc2 = ll2;
+ }
+ ijp = commonIP;
+
+#if 0
+ {
+ double t = 0.0;
+ for( i=0; i<icyc; i++ )
+ t += eff1[i];
+ fprintf( stderr, "## totaleff = %f\n", t );
+ }
+#endif
+
+
+#if SLOW
+#else
+// cpmx_calc_new( seq1, cpmx1, eff1, lgth1, icyc );
+// cpmx_calc_new( seq2, cpmx2, eff2, lgth2, jcyc );
+ for( c=0; c<maxdistclass; c++ )
+ {
+ cpmx_calc_new( seq1, cpmx1s[c], eff1s[c], lgth1, icyc );
+ cpmx_calc_new( seq2, cpmx2s[c], eff2s[c], lgth2, jcyc );
+ }
+#endif
+
+
+// reporterr( "Counting gaplen\n" );
+ gaplencount( icyc, lgth1, gaplen1, seq1, eff1 );
+ gaplencount( jcyc, lgth2, gaplen2, seq2, eff2 );
+#if DEBUG
+ reporterr( "group1 = \n" );
+ showgaplen( gaplen1, lgth1 );
+ reporterr( "group2 = \n" );
+ showgaplen( gaplen2, lgth2 );
+#endif
+// reporterr( "done.\n" );
+
+
+ for( i=0; i<lgth1+1; i++ ) for( j=0; j<lgth2+1; j++ )
+ {
+#if USEGAPLENMTX
+// duplicategaplen( gaplen1mtx[i][j], gaplen1, lgth1 );
+// duplicategaplen( gaplen2mtx[i][j], gaplen2, lgth2 );
+
+// duplicategaplenpartly( gaplen2mtx[i][j], gaplen2, j-0, lgth2 ); // anzen
+// duplicategaplenpartly( gaplen1mtx[i][j], gaplen1, i-0, lgth1 ); // anzen
+ duplicategaplenpartly( gaplen1mtx[i][j], gaplen1, i-0, i ); // iranaikamo
+ duplicategaplenpartly( gaplen2mtx[i][j], gaplen2, j-0, j ); // iranaikamo
+#endif
+#if USEGAPLENHALF
+
+// duplicategaplenpartly( gaplen1half[i][j], gaplen1+i, 0, lgth1-i ); // KOKO de setsuyaku dekiru to omou.
+// duplicategaplenpartly( gaplen2half[i][j], gaplen2+j, 0, lgth2-j ); // originally, j-1, lgth2
+ duplicategaplenpartly( gaplen1half[i][j], gaplen1+i, 0, 0 ); // test
+ duplicategaplenpartly( gaplen2half[i][j], gaplen2+j, 0, 0 ); // test
+#endif
+ }
+
+
+#if USEGAPLENMTX
+ reporterr( "Duplicating gaplen*mtx\n" );
+ for( i=0; i<lgth1+1; i++ )
+ {
+// addnewgaplen( gaplen1mtx[i][0], gaplen1, gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2mtx[i][0], gaplen2, gaplen2, lgth2, 0, i );
+// duplicategaplenpartly( gaplen1mtx[i][0], gaplen1, 0, lgth1 );
+// duplicategaplenpartly( gaplen2mtx[i][0], gaplen2, 0, lgth2 );
+ copygaplenrestricted( gaplen2mtx[i][0], gaplen2, lgth2, 0, i, 0, 0 );
+ }
+#endif
+#if USEGAPLENHALF
+ reporterr( "Duplicating gaplen*mtx\n" );
+ for( i=0; i<lgth1+1; i++ )
+ {
+ copygaplenrestricted( gaplen2half[i][0], gaplen2, lgth2, 0, i, 0, 0 );
+ }
+#endif
+
+
+
+ for( i=0; i<1; i++ )
+ {
+// duplicategaplencompactx( gaplen1icurr[i], gaplen1, lgth1, i-0, lgth1 ); //originally, 0, lgth1
+//
+// duplicategaplencompactx( gaplen1icurr[i], gaplen1+i, lgth1-i, 0, lgth1-i ); // half
+ duplicategaplencompactx( gaplen1icurr[i], gaplen1+i, lgth1-i, 0, 1 ); // 0, 1 hitsuyou
+
+
+// duplicategaplencompactx( gaplen2icurr[i], gaplen2, lgth2, 0, lgth2 ); // ichiou zenbu
+ duplicategaplencompactx( gaplen2icurr[i], gaplen2, lgth2, 0, 0 );
+
+
+ copygaplencompactx( gaplen2icurr[i], gaplen2, lgth2, 0, i, 0, 0 ); // -> zurasu -> error?
+
+
+// duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1, lgth1, 0, 1 );
+// duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1+i, lgth1-i, 0, 1 ); // half
+ duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1+i, lgth1-i, 0, 0 ); // half
+// duplicategaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, lgth2 );
+ duplicategaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, 0 );
+// copygaplenrestricted( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, 0 ); // -> zurasu -> error?
+// copygaplenrestricted_zurasu( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, lgth2, 0, lgth2 ); // -> zurasu -> error?
+ copygaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, 0 ); // -> zurasu -> error?
+ }
+
+// reporterr( "Duplicating gaplen*j*curr \n" );
+// int nduplicated = 0;
+ for( j=0; j<lgth2+1; j++ )
+// for( j=0; j<1; j++ )
+ {
+#if USEGAPLENMTX
+// addnewgaplen( gaplen1mtx[0][j], gaplen1, gaplen1, lgth1, 0, j );
+// addnewgaplen( gaplen2mtx[0][j], gaplen2, gaplen2, lgth2, -1, 0 );
+// duplicategaplenpartly( gaplen1mtx[0][j], gaplen1, 0, lgth1 );
+// duplicategaplenpartly( gaplen2mtx[0][j], gaplen2, 0, lgth2 );
+ copygaplenrestricted( gaplen1mtx[0][j], gaplen1, lgth1, 0, j, 0, 0 );
+#endif
+
+#if USEGAPLENHALF
+ copygaplenrestricted( gaplen1half[0][j], gaplen1, lgth1, 0, j, 0, 0 );
+#endif
+// reporterr( "1jcurr?\n" );
+// duplicategaplencompactx( gaplen1jcurr[j], gaplen1, lgth1, 0, lgth1 ); // test
+ duplicategaplencompactx( gaplen1jcurr[j], gaplen1, lgth1, 0, 0 ); // dame?
+// reporterr( "done\n" );
+// duplicategaplencompactx( gaplen1jcurr[j], gaplen1, lgth1, 0, 0 ); //test
+
+// duplicategaplencompactx( gaplen2jcurr[j], gaplen2, lgth2, j-0, lgth2 ); // full
+// duplicategaplencompactx( gaplen2jcurr[j], gaplen2+j, lgth2-j, 0, lgth2-j ); //half! KOKO????
+//reporterr( "starting suspicious duplication\n" );
+ duplicategaplencompactx( gaplen2jcurr[j], gaplen2+j, lgth2-j, 0, 0 ); //half!
+//reporterr( "starting suspicious copy\n" );
+ copygaplencompactx( gaplen1jcurr[j], gaplen1, lgth1, 0, j, 0, 0 ); // TEST
+//reporterr( "finished\n" );
+
+// reporterr( "Checking gaplen1jcurr[%d]\n", j );
+// checkgaplen( gaplen1jcurr[j], 100 );
+// reporterr( "Checking gaplen2jcurr[%d]\n", j );
+// checkgaplen( gaplen2jcurr[j], 100 );
+ }
+
+// reporterr( "nduplicated (corrected) = %d\n", nduplicated );
+
+// reporterr( "Duplicating gaplen*j*prev \n\n" );
+ for( j=0; j<lgth2+1; j++ ) // allocate nominotame, atode uwagaki
+ {
+// duplicategaplencompactx( gaplen1jprev[j], gaplen1, lgth1, 0, lgth1 );
+ duplicategaplencompactx( gaplen1jprev[j], gaplen1, lgth1, 0, 0 ); // TEST
+// duplicategaplencompactx( gaplen2jprev[j], gaplen2, lgth2, j-0, lgth2 ); // originally, 0,lgth2
+// duplicategaplencompactx( gaplen2jprev[j], gaplen2+j, lgth2-j, 0, lgth2-j ); // half
+ duplicategaplencompactx( gaplen2jprev[j], gaplen2+j, lgth2-j, 0, 0 ); // half
+
+
+ copygaplencompactx( gaplen1jprev[j], gaplen1, lgth1, 0, j, 0, 0 ); // wasuretetakamo
+
+
+ }
+
+
+// reporterr( "Duplicating gaplen*j*best \n\n" );
+
+ for( j=0; j<lgth2+1; j++ )
+// for( j=0; j<1; j++ )
+ {
+// duplicategaplencompactx( gaplen1jbestkamo[j], gaplen1, lgth1, 0, lgth1 ); // KOKO
+// duplicategaplencompactx( gaplen1jbestkamo[j], gaplen1, lgth1, 0, 0 ); // test
+// duplicategaplencompactx( gaplen1jbestkamo[j], gaplen1, lgth1, 0, 1 );
+ duplicategaplencompactx( gaplen1jbestkamo[j], gaplen1, lgth1, 0, 0 );
+
+
+// duplicategaplencompactx( gaplen1jbestkamo[j], gaplen1, lgth1, 0, 1 );
+// duplicategaplencompactx( gaplen2jbestkamo[j], gaplen2, lgth2, j-0, j+1 ); // originally, 0, j+1
+ duplicategaplencompactx( gaplen2jbestkamo[j], gaplen2+j, lgth2-j, 0, 1 ); // half!
+ copygaplencompactx( gaplen1jbestkamo[j], gaplen1, lgth1, 0, j, 0, 0 ); // TEST
+
+// duplicategaplencompactx( gaplen1jbest[j], gaplen1, lgth1, 0, lgth1 ); // KOKO
+ duplicategaplencompactx( gaplen1jbest[j], gaplen1, lgth1, 0, 0 ); // test
+// duplicategaplencompactx( gaplen2jbest[j], gaplen2, lgth2,j-0, j+1 ); // originally, 0,j+1
+ duplicategaplencompactx( gaplen2jbest[j], gaplen2+j, lgth2-j, 0, 1 ); // half!
+ copygaplencompactx( gaplen1jbest[j], gaplen1, lgth1, 0, j, 0, 0 ); // TEST
+
+
+ }
+
+// reporterr( "Duplication end\n" );
+
+
+#if 0
+ reporterr( "Checking gaplen1icurr\n" );
+ checkgaplen( gaplen1icurr[0], 100 );
+ reporterr( "Checking gaplen2icurr\n" );
+ checkgaplen( gaplen2icurr[0], 100 );
+#endif
+
+
+
+
+// showgaplen( gaplen1jcurr[50], lgth2 );
+
+ currentw = w1;
+ previousw = w2;
+
+// match_calc( n_dynamicmtx, initverticalw, cpmx2, cpmx1, 0, lgth1, doublework, intwork, 1 );
+#if SLOW
+ match_calc_slow( which, matrices, initverticalw, jcyc, seq2, eff2, icyc, seq1, eff1, 0, lgth1, *doublework, *intwork, 1, 1 );
+// for( i=0; i<lgth1; i++ ) fprintf( stderr, "%d - %f\n", i, initverticalw[i] );
+#else
+ fillzero( initverticalw, lgth1 );
+ for( c=0; c<maxdistclass; c++ )
+ {
+// fprintf( stderr, "c=%d matrices[c][W][W] = %f\n", c, matrices[c][amino_n['W']][amino_n['W']] );
+// for( i=0; i<lgth1; i++ ) fprintf( stderr, "seq1[i] = %c, cpmx1s[c][3][%d] = %f\n", seq1[0][i], i, cpmx1s[c][3][i] );
+// for( i=0; i<lgth2; i++ ) fprintf( stderr, "seq2[i] = %c, cpmx2s[c][3][%d] = %f\n", seq2[0][i], i, cpmx2s[c][3][i] );
+ match_calc_add( matrices[c], initverticalw, cpmx2s[c], cpmx1s[c], 0, lgth1, doublework[c], intwork[c], 1 );
+// for( i=0; i<lgth1; i++ ) fprintf( stderr, "c=%d, %d - %f\n", c, i, initverticalw[i] );
+
+ if( nmask[c] ) match_calc_del( which, matrices, initverticalw, jcyc, seq2, eff2, icyc, seq1, eff1, 0, lgth1, c, nmask[c], masklist2[c], masklist1[c] );
+ }
+#endif
+// reporterr( "initverticalw = \n" );
+// for( i=0; i<lgth1; i++ ) fprintf( stderr, "%d - %f\n", i, initverticalw[i] );
+
+
+ if( localhom )
+ imp_match_out_vead_tate( initverticalw, 0, lgth1 ); // 060306
+
+// match_calc( n_dynamicmtx, currentw, cpmx1, cpmx2, 0, lgth2, doublework, intwork, 1 );
+#if SLOW
+ match_calc_slow( which, matrices, currentw, icyc, seq1, eff1, jcyc, seq2, eff2, 0, lgth2, *doublework, *intwork, 1, 0 );
+// for( i=0; i<lgth2; i++ ) fprintf( stderr, "%d - %f\n", i, currentw[i] );
+// exit( 1 );
+#else
+ fillzero( currentw, lgth2 );
+ for( c=0; c<maxdistclass; c++ )
+ {
+ match_calc_add( matrices[c], currentw, cpmx1s[c], cpmx2s[c], 0, lgth2, doublework[c], intwork[c], 1 );
+ if( nmask[c] ) match_calc_del( which, matrices, currentw, icyc, seq1, eff1, jcyc, seq2, eff2, 0, lgth2, c, nmask[c], masklist1[c], masklist2[c] );
+ }
+#endif
+// reporterr( "currentw = \n" );
+// for( i=0; i<lgth2; i++ ) fprintf( stderr, "%d - %f\n", i, currentw[i] );
+
+
+
+
+
+// exit( 1 );
+
+
+ if( localhom )
+ imp_match_out_vead( currentw, 0, lgth2 ); // 060306
+#if 0 // -> tbfast.c
+ if( localhom )
+ imp_match_calc( n_dynamicmtx, currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 );
+
+#endif
+
+ for( j=1; j<lgth2+1; j++ )
+ {
+ pfac = calcpfac_gap_noidatend( gaplen1, gaplen2, j, 0, j, seq1[0], seq2[0], 0 );
+// reporterr( "computing initial end gap penalty for %c-%c, i=0, j=%d, pfac=%f\n", seq1[0][0], seq2[0][j], j, pfac );
+// reporterr( "%c-%c, i=0, j=%d, currentw[j]=%f -> ", seq1[0][0], seq2[0][j], j, currentw[j] );
+ currentw[j] += fpenalty * pfac; // tekitou
+// reporterr( " %f\n", currentw[j] );
+ }
+ for( i=1; i<lgth1+1; i++ )
+ {
+ pfac = calcpfac_gap_noidatend( gaplen2, gaplen1, i, 0, i, seq2[0], seq1[0], 0 );
+// reporterr( "computing initial end gap penalty for %c-%c, i=%d, j=0, pfac=%f\n", seq1[0][i], seq2[0][0], i, pfac );
+ initverticalw[i] += fpenalty * pfac; // tekitou
+ }
+
+
+
+ for( j=1; j<lgth2+1; ++j )
+ {
+
+
+#if ALGZGAP
+ m[j] = currentw[j-1] + ogcp1[1] * gapfreq2[j-1]; mp[j] = 0;;
+#else
+ pfac = calcpfac_gapex_noidatend( gaplen2, gaplen1, j, 1, j, seq2[0], seq1[0], 1 );
+#if DEBUG
+ reporterr( "%c-%c, INITIAL jgap extension check, pfac = %f\n\n", seq1[0][j], '=', pfac );
+#endif
+ m[j] = currentw[j-1] + fpenalty * pfac;
+ mp[j] = 0;
+#endif
+ }
+ if( lgth2 == 0 )
+ lastverticalw[0] = 0.0; // Falign kara yobaretatoki kounarukanousei ari
+ else
+ lastverticalw[0] = currentw[lgth2-1];
+
+ if( tailgp ) lasti = lgth1+1; else lasti = lgth1;
+ lastj = lgth2+1;
+
+
+ for( i=1; i<lasti; i++ )
+ {
+// reporterr( "i = %d\n", i );
+
+// reporterr( "err1? i=%d/%d\n", i, lgth1 );
+#ifdef enablemultithread
+// fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref );
+ if( chudanpt && *chudanpt != chudanref )
+ {
+ cleargaplens( gaplens );
+ if( masklist1 ) FreeIntMtx( masklist1 ); masklist1 = NULL;
+ if( masklist2 ) FreeIntMtx( masklist2 ); masklist2 = NULL;
+ if( nmask ) free( nmask ); nmask = NULL;
+// fprintf( stderr, "\n\n## CHUUDAN!!! S\n" );
+ *chudanres = 1;
+ return( -1.0 );
+ }
+#endif
+
+
+ wtmp = previousw;
+ previousw = currentw;
+ currentw = wtmp;
+
+ previousw[0] = initverticalw[i-1];
+
+#if 1
+ gaplentmp = gaplen1jprev;
+ gaplen1jprev = gaplen1jcurr;
+ gaplen1jcurr = gaplentmp;
+
+ gaplentmp = gaplen2jprev;
+ gaplen2jprev = gaplen2jcurr;
+ gaplen2jcurr = gaplentmp;
+
+#if DEBUG
+ reporterr( "Entering a small j loop, i=%d\n", i );
+ for( j=1; j<lgth2+1; j++ )
+ {
+ reporterr( "before j loop, i=%d, gaplen2jcurr[%d] = \n", i, j );
+ showgaplen( gaplen2jcurr[j], 100 );
+ reporterr( "\n" );
+ reporterr( "before j loop, i=%d, gaplen2prev[%d] = \n", i, j );
+ showgaplen( gaplen2jprev[j], 100 );
+ reporterr( "\n" );
+ }
+#endif
+#else
+
+ reporterr( "Entering a small j loop, i=%\n", i );
+ for( j=1; j<lgth2+1; j++ )
+ {
+// addnewgaplen( gaplen1jprev[j], gaplen1jcurr[j], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2jprev[j], gaplen2jcurr[j], gaplen2, lgth2, -1, 0 );
+ reporterr( "err1? j=%d/%d\n", j, lgth2 );
+ copygaplencompactx( gaplen1jprev[j-1], gaplen1jcurr[j-1], lgth1, -1, 0, i-1, i-1 ); // TEST
+ reporterr( "err1? j=%d/%d\n", j, lgth2 );
+ copygaplencompactx( gaplen2jprev[j-1], gaplen2jcurr[j-1], lgth2, -1, 0, j-1, j-1 ); // TETS
+#if DEBUG
+ reporterr( "before j loop, i=%d, gaplen2jcurr[%d] = \n", i, j );
+ showgaplen( gaplen2jcurr[j], 100 );
+ reporterr( "\n" );
+ reporterr( "before j loop, i=%d, gaplen2prev[%d] = \n", i, j );
+ showgaplen( gaplen2jprev[j], 100 );
+ reporterr( "\n" );
+#endif
+ }
+#endif
+
+// reporterr( "err2? i=%d/%d\n", i, lgth1 );
+
+// duplicategaplencompactx( gaplen1icurr[i], gaplen1, lgth1, i, i+1 ); //originally 0, i+1
+// reporterr( "gaplen+0=\n");
+// showgaplen( gaplen1, 10 );
+// reporterr( "i=%d, lgth1=%d, lgth1-i=%d, gaplen+i-1=\n", i, lgth1, lgth1-i );
+// showgaplen( gaplen1+i-1, 100 );
+ duplicategaplencompactx( gaplen1icurr[i], gaplen1+i, lgth1-i, 0, 1 ); // half!!
+// duplicategaplencompactx( gaplen2icurr[i], gaplen2, lgth2, 0, lgth2 ); // KOKO
+ duplicategaplencompactx( gaplen2icurr[i], gaplen2, lgth2, 0, 0 ); // test
+ copygaplencompactx( gaplen2icurr[i], gaplen2, lgth2, 0, i, 0, 0 ); // IRU? TEST
+
+
+
+// duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1, lgth1, i, i+1 ); //originally 0, i+1
+ duplicategaplencompactx( gaplen1ibestkamo[i], gaplen1+i, lgth1-i, 0, 1 ); //half
+// duplicategaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, lgth2 ); // ORIGINALLY, 0, lgth2
+ duplicategaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, 0 ); // ORIGINALLY, 0, lgth2
+// copygaplenrestricted( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, lgth2, 0, 0 ); // IRU? // TEST
+ copygaplencompactx( gaplen2ibestkamo[i], gaplen2, lgth2, 0, i, 0, 0 ); // IRU? // TEST
+
+ extendgaplencompactx( gaplen1jprev[0], gaplen1, i ); // ???
+
+
+// addnewgaplen( gaplen1jprev[0], gaplen1icurr[i-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2jprev[0], gaplen2icurr[i-1], gaplen2, lgth2, -1, 0 );
+// copygaplenrestricted( gaplen1jprev[0], gaplen1icurr[i-1], lgth1, -1, 0, i, i ); // i-1, i da to omou.
+ copygaplencompactx( gaplen1jprev[0], gaplen1icurr[i-1], lgth1-i, -1, 0, i, 1 ); // half? lgth1-i?
+// copygaplenrestricted( gaplen2jprev[0], gaplen2icurr[i-1], lgth2, -1, 0, 0, 0 );
+ copygaplencompactx( gaplen2jprev[0], gaplen2icurr[i-1], lgth2-j, -1, 0, 0, 0 ); // half?? lgth2-j?
+
+
+// match_calc( n_dynamicmtx, currentw, cpmx1, cpmx2, i, lgth2, doublework, intwork, 0 );
+#if SLOW
+ match_calc_slow( which, matrices, currentw, icyc, seq1, eff1, jcyc, seq2, eff2, i, lgth2, *doublework, *intwork, 0, 0 );
+#else
+ fillzero( currentw, lgth2 );
+ for( c=0; c<maxdistclass; c++ )
+ {
+ match_calc_add( matrices[c], currentw, cpmx1s[c], cpmx2s[c], i, lgth2, doublework[c], intwork[c], 0 );
+ if( nmask[c] ) match_calc_del( which, matrices, currentw, icyc, seq1, eff1, jcyc, seq2, eff2, i, lgth2, c, nmask[c], masklist1[c], masklist2[c] );
+ }
+#endif
+
+
+#if XXXXXXX
+fprintf( stderr, "\n" );
+fprintf( stderr, "i=%d\n", i );
+fprintf( stderr, "currentw = \n" );
+for( j=0; j<lgth2; j++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[j] );
+}
+fprintf( stderr, "\n" );
+#endif
+ if( localhom )
+ {
+// fprintf( stderr, "Calling imp_match_calc (o) lgth = %d, i = %d\n", lgth1, i );
+#if 0
+ imp_match_out_vead( currentw, i, lgth2 );
+#else
+ imp_match_out_vead( currentw, i, lgth2 );
+#endif
+ }
+#if XXXXXXX
+fprintf( stderr, "\n" );
+fprintf( stderr, "i=%d\n", i );
+fprintf( stderr, "currentw = \n" );
+for( j=0; j<lgth2; j++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[j] );
+}
+fprintf( stderr, "\n" );
+#endif
+ currentw[0] = initverticalw[i];
+
+#if 0
+ fprintf( stderr, "%c ", seq1[0][i] );
+ for( j=0; j<lgth2+1; j++ )
+ {
+ fprintf( stderr, "%5.0f ", currentw[j] );
+ }
+ fprintf( stderr, "\n" );
+#endif
+
+// mi = previousw[0] + ogcp2[1]; mpi = 0;
+
+
+
+
+
+#if ALGZGAP
+ mi = previousw[0] + ogcp2[1] * gapfreq1[i-1]; mpi=0;
+#else
+ pfac = calcpfac_gapex_noidatend( gaplen1, gaplen2, i, 1, i, seq1[0], seq2[0], 1 );
+#if DEBUG
+ reporterr( "%c-%c, INITIAL igap extension check, pfac = %f\n\n", '=', seq2[0][j], pfac );
+#endif
+ mi = previousw[0] + fpenalty * pfac;
+ mpi=0;
+#endif
+ ijppt = ijp[i] + 1;
+ mjpt = m + 1;
+ prept = previousw;
+ curpt = currentw + 1;
+ mpjpt = mp + 1;
+
+
+ if( trywarp )
+ {
+ prevwmrecordspt = prevwmrecords;
+ wmrecordspt = wmrecords+1;
+ wmrecords1pt = wmrecords;
+ warpipt = warpi + 1;
+ warpjpt = warpj + 1;
+ }
+
+
+
+
+// reporterr( "\n\ni=%d, %c\n", i, seq1[0][i] );
+ for( j=1; j<lastj; j++ )
+ {
+
+#if DEBUG
+ reporterr( "***** %c%d-%c%d ******* \n", seq1[0][i], i, seq2[0][j], j );
+ reporterr( "mpi=%d (%c), *mpjpt=%d (%c)\n", mpi, seq2[0][mpi], *mpjpt, seq1[0][*mpjpt] );
+#endif
+
+
+// Hitsuyou na bun dake tsuika
+#if USEGAPLENMTX
+ extendgaplenpartly( gaplen1mtx[i-1][mpi], gaplen1, i, i );
+ extendgaplenpartly( gaplen2mtx[i-1][mpi], gaplen2, j, j );
+ extendgaplenpartly( gaplen1mtx[*mpjpt][j-1], gaplen1, i, i );
+ extendgaplenpartly( gaplen2mtx[*mpjpt][j-1], gaplen2, j, j );
+ extendgaplenpartly( gaplen1mtx[i-1][j-1], gaplen1, i, i );
+ extendgaplenpartly( gaplen2mtx[i-1][j-1], gaplen2, j, j );
+#endif
+#if USEGAPLENHALF
+ extendgaplenpartly( gaplen1half[i-1][mpi], gaplen1+i-1, 1, 1 );
+ extendgaplenpartly( gaplen2half[i-1][mpi], gaplen2+mpi, j-mpi, j-mpi );
+ extendgaplenpartly( gaplen1half[*mpjpt][j-1], gaplen1+*mpjpt, i-*mpjpt, i-*mpjpt );
+ extendgaplenpartly( gaplen2half[*mpjpt][j-1], gaplen2+j-1, 1, 1 );
+ extendgaplenpartly( gaplen1half[i-1][j-1], gaplen1+i-1, 1, 1 );
+ extendgaplenpartly( gaplen2half[i-1][j-1], gaplen2+j-1, 1, 1 );
+
+#endif
+
+// reporterr( "extending gaplen1icurr\n" );
+ extendgaplencompactx( gaplen1icurr[i-1], gaplen1+i-1, 1 ); // iruhazu
+// reporterr( "extending gaplen2icurr\n" );
+ extendgaplencompactx( gaplen2icurr[i-1], gaplen2, j ); // iruhazu
+// reporterr( "extending gaplen1jprev[j-1], j-1=%d\n", j-1 );
+ extendgaplencompactx( gaplen1jprev[j-1], gaplen1, i );
+// reporterr( "extending gaplen1jcurr, j-1=%d\n", j-1 );
+ extendgaplencompactx( gaplen1jcurr[j-1], gaplen1, i );
+// reporterr( "extending gaplen2jprev\n" );
+ extendgaplencompactx( gaplen2jprev[j-1], gaplen2+j-1, 1 );
+// reporterr( "extending gaplen2jcurr\n" );
+ extendgaplencompactx( gaplen2jcurr[j-1], gaplen2+j-1, 1 );
+// reporterr( "extending gaplen1jbest[j-1]\n" );
+ extendgaplencompactx( gaplen1jbest[j-1], gaplen1, i );
+// reporterr( "extending gaplen1jbestkamo[j-1]\n" );
+ extendgaplencompactx( gaplen1jbestkamo[j-1], gaplen1, i );
+// reporterr( "extending gaplen1jprev[mpi], j-1=%d\n", j-1 );
+ extendgaplencompactx( gaplen1jprev[mpi], gaplen1, i ); // full
+// reporterr( "extending gaplen2jprev[mpi]\n" );
+ extendgaplencompactx( gaplen2jprev[mpi], gaplen2+mpi, j-mpi ); // half
+// reporterr( "extending gaplen1ibestkamo[i-1]\n" );
+ extendgaplencompactx( gaplen1ibestkamo[i-1], gaplen1+i-1, 1 );
+// reporterr( "extending gaplen2ibestkamo[i-1]\n" );
+ extendgaplencompactx( gaplen2ibestkamo[i-1], gaplen2, j );
+
+
+#if DEBUG
+ reporterr( "at the head of j loop, i,j=%d,%d, gaplen2jcurr[j]=\n", i, j );
+ showgaplen( gaplen2jcurr[j], 100 );
+
+ reporterr( "at the head of j loop, i,j=%d,%d, gaplen2jcurr[j-1]=\n", i, j );
+ showgaplen( gaplen2jcurr[j-1], 100 );
+
+
+ reporterr( "at the head of j loop, i,j=%d,%d, gaplen2jprev[j]=\n", i, j );
+ showgaplen( gaplen2jprev[j], 100 );
+
+ reporterr( "at the head of j loop, i,j=%d,%d, gaplen2jprev[j-1]=\n", i, j );
+ showgaplen( gaplen2jprev[j-1], 100 );
+#endif
+
+
+#ifdef xxxenablemultithread
+// fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref );
+ if( chudanpt && *chudanpt != chudanref )
+ {
+ cleargaplens( gaplens );
+ if( masklist1 ) FreeIntMtx( masklist1 ); masklist1 = NULL;
+ if( masklist2 ) FreeIntMtx( masklist2 ); masklist2 = NULL;
+ if( nmask ) free( nmask ); nmask = NULL;
+// fprintf( stderr, "\n\n## CHUUDAN!!! S\n" );
+ *chudanres = 1;
+ return( -1.0 );
+ }
+#endif
+#if USEGAPLENHALF
+// i==248, j==80 wo check
+#if DEBUG80
+ if( j==80 )
+ {
+ reporterr( "When i==%d, j==%d,\n", i, j );
+ reporterr( "gaplen2jprev[j-1=%d]=\n", j-1 );
+ showgaplen( gaplen2jprev[j-1], lgth1 );
+ reporterr( "gaplen2half[i-1=%d][j-1=%d]=\n", i-1, j-1 );
+ showgaplen( gaplen2half[i-1][j-1], lgth1 );
+ }
+ if( j==79 )
+ {
+ reporterr( "When i==%d, j==%d,\n", i, j );
+ reporterr( "gaplen2jprev[j-1=%d]=\n", j-1 );
+ showgaplen( gaplen2jprev[j-1], lgth1 );
+ reporterr( "gaplen2half[i-1=%d][j-1=%d]=\n", i-1, j-1 );
+ showgaplen( gaplen2half[i-1][j-1], lgth1 );
+ }
+#endif
+#endif
+
+
+// pfac = calcpfac( gaplen1jprev[j-1], gaplen2jprev[j-1], i, j, seq1[0], seq2[0] );
+//reporterr( "#### COMPACT, i,j=%d,%d\n", i, j );
+ pfac = calcpfacnoidatend( gaplen1jprev[j-1], gaplen2jprev[j-1], i, 1, seq1[0], seq2[0]+j, one ); // 1j->full, 2j->half
+#if USEGAPLENMTX
+//reporterr( "#### FULL, i,j=%d,%d\n", i, j );
+ pfactmp = calcpfac( gaplen1mtx[i-1][j-1], gaplen2mtx[i-1][j-1], i, j, seq1[0], seq2[0], one );
+#endif
+#if USEGAPLENHALF
+//reporterr( "#### HALF, i,j=%d/%d,%d/%d\n", i, lgth1, j, lgth2 );
+// showgaplen( gaplen2half[i-1][j-1], lgth2-j );
+ pfactmp = calcpfac( gaplen1half[i-1][j-1], gaplen2half[i-1][j-1], 1, 1, seq1[0]+i, seq2[0]+j, zero );
+#endif
+#if USEGAPLENMTX + USEGAPLENHALF
+ if( pfac != pfactmp )
+ {
+ reporterr( "(straight) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j );
+ PFACERROR = 1;
+ exit( 1 );
+ }
+#endif
+//if( i==50 && j==135 ) exit( 1 );
+
+
+// reporterr( "i,j=%d,%d, *prept = %f\n", i, j, *prept );
+
+#if ALGZSTRAIGHT
+ wm = *prept; // Machigai!!
+#else
+ wm = *prept + fpenalty * pfac;
+#endif
+ *ijppt = 0;
+
+
+#if DEBUG
+ if( i == j )
+ {
+ fprintf( stderr, "\n i=%d, j=%d %c, %c ", i, j, seq1[0][i], seq2[0][j] );
+ fprintf( stderr, "%5.0f, pfac for straight =%f\n", wm, pfac );
+ }
+#endif
+ newgaplen = j-mpi-1;
+
+
+//if( i == 53 && j == 93 ) exit( 1 );
+
+
+
+
+// pfac = calcpfac_gap_incomplete( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], newgaplen, i, j, seq1[0], seq2[0], 0 ); // i-1
+ pfac = calcpfac_gap_noidatend( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], newgaplen, 1, j, seq1[0]+i-1, seq2[0], 0 ); // i-1
+#if USEGAPLENMTX
+ pfactmp = calcpfac_gap_incomplete( gaplen1mtx[i-1][mpi], gaplen2mtx[i-1][mpi], newgaplen, i, j, seq1[0], seq2[0], 1 );
+#endif
+#if USEGAPLENHALF
+ pfactmp = calcpfac_gap_incomplete( gaplen1half[i-1][mpi], gaplen2half[i-1][mpi], newgaplen, 1, j-mpi, seq1[0]+i-1, seq2[0]+mpi, 1 );
+#endif
+#if USEGAPLENMTX || USEGAPLENHALF
+ if( pfac != pfactmp )
+ {
+ reporterr( "(igap) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j );
+ PFACERROR = 1;
+ }
+#endif
+
+
+#if DEBUG
+ reporterr( "%c-%c pfac for igap end incomplete = %f\n", seq1[0][i], seq2[0][j], pfac );
+ reporterr( "mi when igap end checking = %f\n", mi );
+ reporterr( "wm = %f, mi+fpenalty*pfac=%f\n", wm, mi+fpenalty*pfac );
+#endif
+
+
+#if ALGZGAP
+ if( (g=mi+*fgcp2pt*gf1va) > wm )
+#else
+ if( (g=mi+fpenalty*pfac) > wm )
+#endif
+ {
+ wm = g;
+ *ijppt = -( j - mpi );
+#if DEBUG80
+ reporterr( "Selected as igap end! wm = %f, mi = %f\n", wm, mi );
+ fprintf( stderr, "Jump from %d-%d (%c-%c) to %d (%c-%c)!\n", i, j, seq1[0][i], seq2[0][j], mpi, seq1[0][i-1], seq2[0][mpi] );
+#endif
+ }
+
+
+#if 0
+ fprintf( stderr, "%5.0f->", wm );
+#endif
+// if( (g=*mjpt+ fgcp1va* *gf2pt) > wm )
+
+#if 0
+// reporterr( "Checking %c, (current pos = %c)\n", seq2[0][j+1], seq2[0][j] );
+ sfac = 1.0;
+ for( k=0; gaplen2[j+1]&&(gl=gaplen2[j+1][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou
+ {
+// reporterr( ".len = %d, .relend = %d\n", gaplen2[j+1][k].len, gaplen2[j+1][k].relend );
+ if( gl - 1 == gaplen2[j+1][k].relend )
+ {
+ sfac -= gaplen2[j+1][k].freq;
+// reporterr( "Hit! sfac = %f\n", sfac );
+ }
+ }
+ sfac2 = 1.0;
+ for( k=0; gaplen1[i+1]&&(gl=gaplen1[i+1][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou
+ if( gaplen1[i+1][k].relend != -1 ) sfac2 -= gaplen1[i+1][k].freq;
+ sfac *= sfac2;
+#else
+// sfac = 0.0;
+#endif
+
+
+
+#if ALGZGAP
+ if( (g=*prept+*ogcp2pt*gf1vapre) >= mi )
+#else
+// if( (g=*prept + fpenalty * sfac ) >= mi )
+ if( (g=*prept ) >= mi )
+#endif
+ {
+// mpibk = mpi;
+// mi = g - fpenalty * sfac;
+ mi = g;
+ mpi = j-1;
+#if DEBUG80
+ reporterr( "Selected as igap start! %c%d-%c%d, mi=%f, g=%f\n", seq1[0][i-1], i-1, seq2[0][mpi], mpi, mi, g );
+#endif
+
+#if FREEFREQUENTLY
+// freegaplenpartly( gaplen1ibestkamo[i-1], 0, i-1 );
+ freegaplenpartly( gaplen2ibestkamo[i-1], j-3, j-2 );
+#endif
+// freegaplenpartly( gaplen1jprev[mpibk], 0, lgth2 ); // full
+// freegaplenpartly( gaplen2jprev[mpibk], 0, lgth2-mpibk ); // half
+// if( gaplen1jprev[mpibk] ) FreeGaplenMtx( gaplen1jprev[mpibk], 0 );
+// gaplen1jprev[mpibk] = NULL;
+// if( gaplen2jprev[mpibk] ) FreeGaplenMtx( gaplen2jprev[mpibk], 0 );
+// gaplen2jprev[mpibk] = NULL;
+
+
+// addnewgaplen( gaplen1ibestkamo[i-1], gaplen1jprev[j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2ibestkamo[i-1], gaplen2jprev[j-1], gaplen2, lgth2, -1, 0 );
+// copygaplenrestricted( gaplen1ibestkamo[i-1], gaplen1jprev[j-1], lgth1, -1, 0, i, i ); // i-1, i
+ copygaplencompactx( gaplen1ibestkamo[i-1], gaplen1jprev[j-1], lgth1, -1, 0, 1, i ); // half
+// copygaplenrestricted( gaplen2ibestkamo[i-1], gaplen2jprev[j-1], lgth2, -1, 0, j, j ); // mpi, j
+ copygaplencompactx( gaplen2ibestkamo[i-1], gaplen2jprev[j-1], lgth2, -1, 0, j, 1 ); //half
+
+
+ }
+
+
+
+
+
+
+// reporterr( "g=%f, *prept=%f, mi=%f\n", g, *prept, mi );
+
+
+#if USE_PENALTY_EX
+ mi += fpenalty_ex;
+#endif
+
+#if ALGZGAP
+ pfac = 0.0; // CHUUI!
+#else
+
+// pfac = calcpfac_gapex( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], i, j, j-mpi, seq1[0], seq2[0], 1 ); // i-1
+ pfac = calcpfac_gapex_noidatend( gaplen1ibestkamo[i-1], gaplen2ibestkamo[i-1], 1, j, j-mpi, seq1[0]+i, seq2[0], 1 ); // 1ibest->half, 2ibest->full
+#if USEGAPLENMTX
+ pfactmp = calcpfac_gapex( gaplen1mtx[i-1][mpi], gaplen2mtx[i-1][mpi], i, j, j-mpi, seq1[0], seq2[0], 1 );
+#endif
+#if USEGAPLENHALF
+ pfactmp = calcpfac_gapex( gaplen1half[i-1][mpi], gaplen2half[i-1][mpi], 1, j-mpi, j-mpi, seq1[0]+i, seq2[0]+mpi, 1 );
+#endif
+#if USEGAPLENMTX || USEGAPLENHALF
+ if( pfac != pfactmp )
+ {
+ reporterr( "(igapex) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j );
+ PFACERROR = 1;
+ }
+#endif
+
+
+
+
+
+
+
+#if DEBUG
+ reporterr( "%c-%c, igap extension check, pfac = %f\n\n", '=', seq2[0][j], pfac );
+#endif
+#endif
+// reporterr( "mi = %f -> ", mi );
+ mi += fpenalty * pfac;
+// reporterr( "mi = %f\n", mi );
+
+
+// reporterr( "using %d-%d, %d, %d\n", *mpjpt, j-1, i, j );
+ newgaplen = i-*mpjpt-1;
+// pfac = calcpfac_gap_incomplete( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], newgaplen, j, i, seq2[0], seq1[0], 0 ); // j-1 deha???
+
+
+ pfac = calcpfac_gap_noidatend( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], newgaplen, 1, i, seq2[0]+j-1, seq1[0], 1 ); // 2jbestkamo->half, 1jbestkamo->full
+#if USEGAPLENMTX
+ pfactmp = calcpfac_gap_incomplete( gaplen2mtx[*mpjpt][j-1], gaplen1mtx[*mpjpt][j-1], newgaplen, j, i, seq2[0], seq1[0], 1 );
+#endif
+#if USEGAPLENHALF
+ pfactmp = calcpfac_gap_incomplete( gaplen2half[*mpjpt][j-1], gaplen1half[*mpjpt][j-1], newgaplen, 1, i-*mpjpt, seq2[0]+j-1, seq1[0]+*mpjpt, 1 );
+#endif
+#if USEGAPLENMTX || USEGAPLENHALF
+ if( pfac != pfactmp )
+ {
+ reporterr( "(jgap) pfac=%f, but pfactmp=%f (i,j=%d,%d)\n", pfac, pfactmp, i, j );
+// exit( 1 );
+ PFACERROR = 1;
+ }
+#endif
+
+#if ALGZGAP
+ if( (g=*mjpt+ fgcp1va* *gf2pt) > wm )
+#else
+ if( (g=*mjpt + fpenalty*pfac) > wm )
+#endif
+ {
+ wm = g;
+ *ijppt = +( i - *mpjpt );
+
+
+#if FREEFREQUENTLY
+ freegaplenpartly( gaplen1jbest[j-1], i-3, i-2 );
+// freegaplenpartly( gaplen2jbest[j-1], j-3, j-2 );
+#endif
+
+
+#if DEBUG
+ reporterr( "Selected as jgap end!, pfac = %f\n", pfac );
+ fprintf( stderr, "Jump from %d (%c) to %d (%c)!\n", j, seq1[0][j], *mpjpt, seq1[0][*mpjpt] );
+#endif
+// addnewgaplen( gaplen1jbest[j-1], gaplen1jbestkamo[j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2jbest[j-1], gaplen2jbestkamo[j-1], gaplen2, lgth2, -1, 0 );
+ copygaplencompactx( gaplen1jbest[j-1], gaplen1jbestkamo[j-1], lgth1, -1, 0, i, i );// *mpjpt, i
+// copygaplenrestricted( gaplen2jbest[j-1], gaplen2jbestkamo[j-1], lgth2, -1, 0, j, j ); // j-1, j
+ copygaplencompactx( gaplen2jbest[j-1], gaplen2jbestkamo[j-1], lgth2, -1, 0, 1, 1 ); // half!
+
+
+
+
+ }
+
+
+// extendgaplenpartly( gaplen1jbest[j-1], gaplen1, i, i ); // tmptmptmp
+// extendgaplenpartly( gaplen2jbest[j-1], gaplen2, 0, 0 ); // tmptmptmp
+
+#if 0
+ sfac = 1.0;
+ for( l=0; gaplen1[i+1]&&(gl=gaplen1[i+1][l].len)!=-1; l++ ) // ososugi! hash ni atode henkou
+ if( gl - 1 == gaplen1[i+1][l].relend ) sfac -= gaplen1[i+1][l].freq;
+ sfac2 = 1.0;
+ for( k=0; gaplen2[j+1]&&(gl=gaplen2[j+1][k].len)!=-1; k++ ) // ososugi! hash ni atode henkou
+ if( gaplen2[j+1][k].relend != -1 ) sfac2 -= gaplen2[j+1][k].freq;
+ sfac *= sfac2;
+#else
+// sfac = 0.0;
+#endif
+
+#if DEBUG
+ reporterr( " (jgap start check i=%d) -> *prept=%f, *mjpt=%f\n", i, seq1[0][i], seq2[0][j], *prept, *mjpt );
+#endif
+
+#if ALGZGAP
+ if( (g=*prept+ ogcp1va* *gf2ptpre) >= *mjpt )
+#else
+// if( (g=*prept + fpenalty * sfac ) >= *mjpt )
+ if( (g=*prept ) >= *mjpt )
+#endif
+ {
+// *mjpt = g - fpenalty * sfac;
+ *mjpt = g;
+ *mpjpt = i-1;
+#if DEBUG
+ reporterr( "Selected as jgap start!\n" );
+#endif
+
+
+#if FREEFREQUENTLY
+ freegaplenpartly( gaplen1jbestkamo[j-1], i-3, i-2 );
+// freegaplenpartly( gaplen2jbestkamo[j-1], j-3, j-2 );
+#endif
+
+
+// addnewgaplen( gaplen1jbestkamo[j-1], gaplen1jprev[j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], gaplen2, lgth2, -1, 0 );
+// reporterr( "copying gaplen1jbestkamo[%d-1] from galpen1jprev, j=%d, i=%d\n", j, j, i );
+ copygaplencompactx( gaplen1jbestkamo[j-1], gaplen1jprev[j-1], lgth1, -1, 0, i, i ); // *mpjpt, i
+// copygaplenrestricted( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], lgth2, -1, 0, j, j ); // j-1, j
+// copygaplencompactx( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], lgth2, -1, 0, j, 1 ); // half!
+// reporterr( "copying gaplen2jbestkamo[%d-1] from galpen2jprev\n", j );
+ copygaplencompactx( gaplen2jbestkamo[j-1], gaplen2jprev[j-1], lgth2-j, -1, 0, 1, 1 ); // ryouhou half!
+
+
+// if( j==2 && i==1 ) exit( 1 );
+
+
+
+ }
+
+// extendgaplenpartly( gaplen1ibestkamo[i-1], gaplen1, 0, 0 ); // tmptmptmp
+// extendgaplenpartly( gaplen2ibestkamo[i-1], gaplen2, j, j ); // tmptmptmp
+
+
+// extendgaplenpartly( gaplen1jbestkamo[j-1], gaplen1, i, i ); // tmptmptmp
+// extendgaplenpartly( gaplen2jbestkamo[j-1], gaplen2, 0, 0 ); // tmptmptmp
+
+
+#if USE_PENALTY_EX
+ m[j] += fpenalty_ex;
+#endif
+
+#if ALGZGAP
+ pfac = 0.0;
+#else
+
+// pfactmp = calcpfac_gapex( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], j, i, i-*mpjpt, seq2[0], seq1[0], 0 ); // j-1
+ pfactmp = calcpfac_gapex_noidatend( gaplen2jbestkamo[j-1], gaplen1jbestkamo[j-1], 1, i, i-*mpjpt, seq2[0]+j, seq1[0], 0 ); // 2jbestkamo->half, 1jbestkamo->full
+#if USEGAPLENMTX
+ pfac = calcpfac_gapex( gaplen2mtx[*mpjpt][j-1], gaplen1mtx[*mpjpt][j-1], j, i, i-*mpjpt, seq2[0], seq1[0], 0 );
+#endif
+#if USEGAPLENHALF
+ pfac = calcpfac_gapex( gaplen2half[*mpjpt][j-1], gaplen1half[*mpjpt][j-1], 1, i-*mpjpt, i-*mpjpt, seq2[0]+j, seq1[0]+*mpjpt, 0 );
+#endif
+#if USEGAPLENMTX || USEGAPLENHALF
+ if( pfac != pfactmp )
+ {
+ reporterr( "(jgapex) pfac=%f, but pfactmp=%f (i,j=%d,%d) diff=%f\n", pfac, pfactmp, i, j, pfac-pfactmp );
+// exit( 1 );
+ PFACERROR = 1;
+ }
+#endif
+ pfac = pfactmp;
+#if DEBUG
+ reporterr( "%c-%c, jgap extension check (j=%d), pfac = %f\n", seq1[0][i], '=', j, pfac );
+#endif
+#endif
+ m[j] += fpenalty * pfac;
+
+
+
+ if( trywarp )
+ {
+#if USE_PENALTY_EX
+ if( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai
+#else
+ if( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai
+#endif
+ {
+// fprintf( stderr, "WARP!!\n" );
+ if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] )
+ {
+ *ijppt = warpbase + warpn - 1;
+ }
+ else
+ {
+ *ijppt = warpbase + warpn;
+ warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) );
+ warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) );
+ warpis[warpn] = prevwarpi[j-1];
+ warpjs[warpn] = prevwarpj[j-1];
+ warpn++;
+ }
+ wm = g;
+ }
+
+#if 0
+ fprintf( stderr, "%5.0f ", wm );
+#endif
+ curm = *curpt + wm;
+
+ if( *wmrecords1pt > *wmrecordspt )
+ {
+ *wmrecordspt = *wmrecords1pt;
+ *warpipt = *(warpipt-1);
+ *warpjpt = *(warpjpt-1);
+ }
+ if( curm > *wmrecordspt )
+ {
+ *wmrecordspt = curm;
+ *warpipt = i;
+ *warpjpt = j;
+ }
+ wmrecordspt++;
+ wmrecords1pt++;
+ warpipt++;
+ warpjpt++;
+ }
+
+#if DEBUG
+ reporterr( "extention-x 1j???, before extention-x, j=%d\n", j );
+ showgaplen( gaplen1jcurr[j], 100 );
+#endif
+ extendgaplencompactx( gaplen1jcurr[j], gaplen1, i );
+
+#if DEBUG
+ reporterr( "after extention-x\n" );
+ showgaplen( gaplen1jcurr[j], 100 );
+ reporterr( "extention-x 2j???\n" );
+#endif
+ extendgaplencompactx( gaplen2jcurr[j], gaplen2+j, 0 );
+
+
+#if 1
+ if( *ijppt < 0 )
+ {
+#if DEBUG
+ reporterr( "Path: %d-%d->%d-%d, i=%d (%c), j=%d (%c), *ijppt=%d\n", i, j, i-1, j+*ijppt, i, seq1[0][i], j, seq2[0][j], *ijppt );
+ reporterr( "Inserting %d gaps to gaplen1 and copying gaplen2 (%c%d-%c%d)\n", -*ijppt-1, seq1[0][i], i, seq2[0][j], j );
+#endif
+#if USEGAPLENMTX
+// addnewgaplen( gaplen1mtx[i][j], gaplen1mtx[i-1][j+*ijppt], gaplen1, lgth1, i, -*ijppt-1 );
+// addnewgaplen( gaplen2mtx[i][j], gaplen2mtx[i-1][j+*ijppt], gaplen2, lgth2, -1, 0 );
+ copygaplenrestricted( gaplen1mtx[i][j], gaplen1mtx[i-1][j+*ijppt], lgth1, i, -*ijppt-1, i, i ); // 0, lgth1
+ copygaplenrestricted( gaplen2mtx[i][j], gaplen2mtx[i-1][j+*ijppt], lgth2, -1, 0, j, j ); // 0, lgth2
+#endif
+#if USEGAPLENHALF
+ copygaplenrestricted_zurasu( gaplen1half[i][j], gaplen1half[i-1][j+*ijppt], lgth1, 0, -*ijppt-1, 0, 0, 1, 1 ); // 0, lgth1
+ copygaplenrestricted_zurasu( gaplen2half[i][j], gaplen2half[i-1][j+*ijppt], lgth2, -1, 0, 0, 0, -*ijppt, -*ijppt ); // 0, lgth2
+#endif
+// addnewgaplen( gaplen1jcurr[j], gaplen1jprev[j+*ijppt], gaplen1, lgth1, i, -*ijppt-1 );
+// addnewgaplen( gaplen2jcurr[j], gaplen2jprev[j+*ijppt], gaplen2, lgth2, -1, 0 );
+// reporterr( "copying gaplen1jcurr from gaplen1jbest, with a %d insertion\n", -*ijppt-1 );
+ copygaplencompactx( gaplen1jcurr[j], gaplen1jprev[j+*ijppt], lgth1, i, -*ijppt-1, i, i ); // scope: i+*ijppt+1, i ?
+// reporterr( "copy end\n" );
+// copygaplenrestricted( gaplen2jcurr[j], gaplen2jprev[j+*ijppt], lgth2, -1, 0, j, j );
+ copygaplencompactx( gaplen2jcurr[j], gaplen2jprev[j+*ijppt], lgth2, -1, 0, 0, -*ijppt ); // half! ryouho zureteru
+ }
+ else if( *ijppt > 0 )
+ {
+#if DEBUG
+ reporterr( "Path: %d-%d->%d-%d, i=%d (%c), j=%d (%c), *ijppt=%d\n", i, j, i-*ijppt, j-1, i, seq1[0][i], j, seq2[0][j], *ijppt );
+ reporterr( "Copying gaplen1 inserting %d gaps to gaplen2 (%c%d-%c%d)\n", *ijppt-1, seq1[0][i], i, seq2[0][j], j );
+#endif
+#if USEGAPLENMTX
+// addnewgaplen( gaplen1mtx[i][j], gaplen1mtx[i-*ijppt][j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2mtx[i][j], gaplen2mtx[i-*ijppt][j-1], gaplen2, lgth2, j, *ijppt-1 );
+ copygaplenrestricted( gaplen1mtx[i][j], gaplen1mtx[i-*ijppt][j-1], lgth1, -1, 0, i, i ); // 0, lgth1
+ copygaplenrestricted( gaplen2mtx[i][j], gaplen2mtx[i-*ijppt][j-1], lgth2, j, *ijppt-1, j, j ); // 0, lgth2
+#endif
+#if USEGAPLENHALF
+ copygaplenrestricted_zurasu( gaplen1half[i][j], gaplen1half[i-*ijppt][j-1], lgth1, -1, 0, 0, 0, *ijppt, *ijppt ); // 0, lgth1
+ copygaplenrestricted_zurasu( gaplen2half[i][j], gaplen2half[i-*ijppt][j-1], lgth2, 0, *ijppt-1, 0, 0, 1, 1 ); // 0, lgth2
+#endif
+// addnewgaplen( gaplen1jcurr[j], gaplen1jbest[j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2jcurr[j], gaplen2jbest[j-1], gaplen2, lgth2, j, *ijppt-1 );
+ copygaplencompactx( gaplen1jcurr[j], gaplen1jbest[j-1], lgth1, -1, 0, i, i );
+// copygaplenrestricted( gaplen2jcurr[j], gaplen2jbest[j-1], lgth2, j, *ijppt-1, j, j ); // j-*ijppt+1?
+// copygaplenrestricted_zurasu( gaplen2jcurr[j], gaplen2jbest[j-1], lgth2, 0, *ijppt-1, 0, 0, j, j ); // 2jcurr->half, but 2jbest->full, imanotokoro
+ copygaplencompactx( gaplen2jcurr[j], gaplen2jbest[j-1], lgth2, 0, *ijppt-1, 0, 1 ); //ryouhou half
+
+ }
+ else
+#endif
+ {
+#if DEBUG
+ reporterr( "Path: %d-%d->%d-%d, i=%d (%c), j=%d (%c), *ijppt=%d\n", i, j, i-1, j-1, i, seq1[0][i], j, seq2[0][j], *ijppt );
+ reporterr( "Copying gaplen1 and gaplen2 (%c%d-%c%d)\n", seq1[0][i], i, seq2[0][j], j );
+#endif
+#if USEGAPLENMTX
+// addnewgaplen( gaplen1mtx[i][j], gaplen1mtx[i-1][j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2mtx[i][j], gaplen2mtx[i-1][j-1], gaplen2, lgth2, -1, 0 );
+ copygaplenrestricted( gaplen1mtx[i][j], gaplen1mtx[i-1][j-1], lgth1, -1, 0, i, i ); // 0, lgth1
+ copygaplenrestricted( gaplen2mtx[i][j], gaplen2mtx[i-1][j-1], lgth2, -1, 0, j, j ); // 0, lgth2
+#endif
+#if USEGAPLENHALF
+ copygaplenrestricted_zurasu( gaplen1half[i][j], gaplen1half[i-1][j-1], lgth1, -1, 0, 0, 0, 1, 1 ); // 0, lgth1
+ copygaplenrestricted_zurasu( gaplen2half[i][j], gaplen2half[i-1][j-1], lgth2, -1, 0, 0, 0, 1, 1 ); // 0, lgth2
+#endif
+// addnewgaplen( gaplen1jcurr[j], gaplen1jprev[j-1], gaplen1, lgth1, -1, 0 );
+// addnewgaplen( gaplen2jcurr[j], gaplen2jprev[j-1], gaplen2, lgth2, -1, 0 );
+ copygaplencompactx( gaplen1jcurr[j], gaplen1jprev[j-1], lgth1, -1, 0, i, i );
+// copygaplenrestricted( gaplen2jcurr[j], gaplen2jprev[j-1], lgth2, -1, 0, j, j );
+ copygaplencompactx( gaplen2jcurr[j], gaplen2jprev[j-1], lgth2, -1, 0, 0, 1 ); // half
+ }
+
+#if DEBUG
+ reporterr( "at the end of j loop, gaplen1jcurr[%d] = \n", j );
+ showgaplen( gaplen1jcurr[j], 100 );
+ reporterr( "at the end of j loop, gaplen1prev[%d] = \n", j );
+ showgaplen( gaplen1jprev[j], 100 );
+#endif
+
+
+
+#if 1
+ freegaplenpartly( gaplen1jcurr[j-1], i-3, i-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // half!
+ freegaplenpartly( gaplen1jbestkamo[j-1], i-3, i-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jbestkamo[j-1], j-3, j-2 ); // -1 dehanaku -2??
+ freegaplenpartly( gaplen1jbest[j-1], i-3, i-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jbest[j-1], j-3, j-2 ); // -1 dehanaku -2??
+#else
+ freegaplenpartly( gaplen1jprev[j-1], 0, i-2 ); // -1 dehanaku -2??
+ freegaplenpartly( gaplen1jcurr[j-1], 0, i-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jcurr[j-1], j-3, j-2 ); // half!
+ freegaplenpartly( gaplen1jbestkamo[j-1], 0, i-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jbestkamo[j-1], j-3, j-2 ); // -1 dehanaku -2??
+ freegaplenpartly( gaplen1jbest[j-1], 0, i-2 ); // -1 dehanaku -2??
+// freegaplenpartly( gaplen2jbest[j-1], j-3, j-2 ); // -1 dehanaku -2??
+#endif
+
+
+#if USEGAPLENMTX
+// freegaplenpartly( gaplen1mtx[i-1][j-1], 0, i-2 );
+// freegaplenpartly( gaplen2mtx[i-1][j-1], 0, j-2 );
+#endif
+
+
+ *curpt++ += wm;
+ ijppt++;
+ mjpt++;
+ prept++;
+ mpjpt++;
+ }
+ lastverticalw[i] = currentw[lgth2-1];
+
+#if 1
+// freegaplenpartly( gaplen1icurr[i-1], i-1, i-1 );
+ freegaplenpartly( gaplen1icurr[i-1], 0, lgth1-i );
+ freegaplenpartly( gaplen2icurr[i-1], 0, lgth2 );
+// freegaplenpartly( gaplen1ibestkamo[i-1], i-1, i-1 );
+ freegaplenpartly( gaplen1ibestkamo[i-1], 0, lgth1-i );
+ freegaplenpartly( gaplen2ibestkamo[i-1], 0, lgth2 );
+#endif
+
+ if( trywarp )
+ {
+ fltncpy( prevwmrecords, wmrecords, lastj );
+ intncpy( prevwarpi, warpi, lastj );
+ intncpy( prevwarpj, warpj, lastj );
+ }
+#if 0
+ fprintf( stderr, "i=%d, %15.5f \n", i, wm );
+#endif
+//if( i == 2 ) exit( 1 );
+ }
+
+ if( trywarp )
+ {
+// fprintf( stderr, "wm = %f\n", wm );
+// fprintf( stderr, "warpn = %d\n", warpn );
+ free( wmrecords );
+ free( prevwmrecords );
+ free( warpi );
+ free( warpj );
+ free( prevwarpi );
+ free( prevwarpj );
+ }
+
+
+#if OUTGAP0TRY
+ if( !outgap )
+ {
+ for( j=1; j<lgth2+1; j++ )
+ currentw[j] -= offset * ( lgth2 - j ) / 2.0;
+ for( i=1; i<lgth1+1; i++ )
+ lastverticalw[i] -= offset * ( lgth1 - i / 2.0);
+ }
+#endif
+
+ /*
+ fprintf( stderr, "\n" );
+ for( i=0; i<icyc; i++ ) fprintf( stderr,"%s\n", seq1[i] );
+ fprintf( stderr, "#####\n" );
+ for( j=0; j<jcyc; j++ ) fprintf( stderr,"%s\n", seq2[j] );
+ fprintf( stderr, "====>" );
+ for( i=0; i<icyc; i++ ) strcpy( mseq1[i], seq1[i] );
+ for( j=0; j<jcyc; j++ ) strcpy( mseq2[j], seq2[j] );
+ */
+ if( localhom )
+ {
+ Atracking_localhom( impmatch, currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, warpis, warpjs, warpbase );
+ }
+ else
+ Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, tailgp, warpis, warpjs, warpbase );
+
+ if( warpis ) free( warpis );
+ if( warpjs ) free( warpjs );
+
+// fprintf( stderr, "### impmatch = %f\n", *impmatch );
+
+ resultlen = strlen( mseq1[0] );
+ if( alloclen < resultlen || resultlen > N )
+ {
+ fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
+ ErrorExit( "LENGTH OVER!\n" );
+ }
+
+
+ for( i=0; i<icyc; i++ ) strcpy( seq1[i], mseq1[i] );
+ for( j=0; j<jcyc; j++ ) strcpy( seq2[j], mseq2[j] );
+#if 0
+ fprintf( stderr, "\n" );
+ for( i=0; i<icyc; i++ ) fprintf( stderr, "%s\n", mseq1[i] );
+ fprintf( stderr, "#####\n" );
+ for( j=0; j<jcyc; j++ ) fprintf( stderr, "%s\n", mseq2[j] );
+#endif
+
+// reporterr( "clearing\n" );
+ cleargaplens( gaplens );
+ if( masklist1 ) FreeIntMtx( masklist1 ); masklist1 = NULL;
+ if( masklist2 ) FreeIntMtx( masklist2 ); masklist2 = NULL;
+ if( nmask ) free( nmask ); nmask = NULL;
+
+#if USEGAPLENMTX
+/* maikai free */
+ reporterr( "Freeing!\n" );
+ for( i=0; i<lgth1+1; i++ )
+ {
+ for( j=0; j<lgth2+1; j++ )
+ {
+ if( gaplen1mtx[i][j] ) FreeGaplenMtx( gaplen1mtx[i][j], 0 );
+ gaplen1mtx[i][j] = NULL;
+ }
+ free( gaplen1mtx[i] );
+ gaplen1mtx[i] = NULL;
+ }
+ free( gaplen1mtx );
+ gaplen1mtx = NULL;
+
+ for( i=0; i<lgth1+1; i++ )
+ {
+ for( j=0; j<lgth2+1; j++ )
+ {
+ if( gaplen2mtx[i][j] ) FreeGaplenMtx( gaplen2mtx[i][j], 0 );
+ gaplen2mtx[i][j] = NULL;
+ }
+ free( gaplen2mtx[i] );
+ gaplen2mtx[i] = NULL;
+ }
+ free( gaplen2mtx );
+ gaplen2mtx = NULL;
+#endif
+
+
+#if USEGAPLENHALF
+ for( i=0; i<lgth1+1; i++ )
+ {
+ for( j=0; j<lgth2+1; j++ )
+ {
+ if( gaplen1half[i][j] ) FreeGaplenMtx( gaplen1half[i][j], 0 );
+ gaplen1half[i][j] = NULL;
+ }
+ free( gaplen1half[i] );
+ gaplen1half[i] = NULL;
+ }
+ free( gaplen1half );
+ gaplen1half = NULL;
+
+ for( i=0; i<lgth1+1; i++ )
+ {
+ for( j=0; j<lgth2+1; j++ )
+ {
+ if( gaplen2half[i][j] ) FreeGaplenMtx( gaplen2half[i][j], 0 );
+ gaplen2half[i][j] = NULL;
+ }
+ free( gaplen2half[i] );
+ gaplen2half[i] = NULL;
+ }
+ free( gaplen2half );
+ gaplen2half = NULL;
+#endif
+/* maikai free */
+
+
+#if WMCHECK
+ fprintf( stderr, "wm = %f\n", wm - *impmatch);
+ fprintf( stderr, "*impmatch = %f\n", *impmatch);
+
+ int kenzan = 0;
+ for( i=0; i<icyc; i++ ) for( j=0; j<jcyc; j++ )
+ {
+ kenzan += pairgapcount( mseq1[i], mseq2[j] );
+ }
+
+
+ reporterr( "kenzan = %d -> %f\n", kenzan, (double)kenzan /( icyc*jcyc ) );
+
+ double pairscore, nogappairscore, diff;
+ char **pseq;
+ pseq = AllocateCharMtx( 2, strlen( seq1[0] ) + 1 );
+ pairscore = nogappairscore = 0.0;
+#if 1
+ for( i=0; i<icyc; i++ ) for( j=0; j<jcyc; j++ )
+ {
+ strcpy( pseq[0], seq1[i] );
+ strcpy( pseq[1], seq2[j] );
+ commongappick( 2, pseq );
+ c = which[i][j];
+ pairscore += eff1[i] * eff2[j] * naivepairscore11_dynmtx( matrices[c], pseq[0], pseq[1], penalty );
+ nogappairscore += eff1[i] * eff2[j] * naivepairscore11_dynmtx( matrices[c], pseq[0], pseq[1], 0 );
+ }
+#else
+ for( c=0; c<maxdistclass; c++ )
+ {
+ for( i=0; i<icyc; i++ ) for( j=0; j<jcyc; j++ )
+ {
+ strcpy( pseq[0], seq1[i] );
+ strcpy( pseq[1], seq2[j] );
+ commongappick( 2, pseq );
+ pairscore += eff1s[c][i] * eff2s[c][j] * naivepairscore11_dynmtx( matrices[c], pseq[0], pseq[1], penalty );
+ nogappairscore += eff1s[c][i] * eff2s[c][j] * naivepairscore11_dynmtx( matrices[c], pseq[0], pseq[1], 0 );
+ }
+ }
+#endif
+
+ FreeCharMtx( pseq );
+ diff = (pairscore - wm + *impmatch ) / (double)strlen( seq1[0] );
+ reporterr( "pairscore = %f\n", (double)pairscore );
+ reporterr( "pairscore-nogappairscore = %f\n", (double)(pairscore-nogappairscore) );
+ reporterr( "pairscore-nogappairscore / penalty = %f\n", (double)(pairscore-nogappairscore)/(double)(fpenalty) );
+ reporterr( "diff = %f\n\n", diff );
+
+#if 1
+ if( ( !trywarp && fabs( diff ) > 0.01 ) || PFACERROR )
+// if( abs( pairscore - wm +*impmatch ) > 0.01 )
+#else
+ if( abs( pairscore - wm +*impmatch ) > 0.01 )
+#endif
+// if( abs( pairscore - wm +*impmatch ) > 0.01 )
+ {
+ for( i=0; i<icyc; i++ )
+ printf( ">group1\n%s\n", seq1[i] );
+ for( j=0; j<jcyc; j++ )
+ printf( ">group2\n%s\n", seq2[j] );
+ exit( 1 );
+ }
+#else
+ reporterr( "\n" );
+#endif
+
+#if 0
+// if( strlen( seq1[0] ) - lgth1 > 100 && icyc > 1 || strlen( seq2[0] ) - lgth2 > 100 & jcyc > 1 )
+ if( strstr( seq1[0], "LNDDAT" ) && icyc == 1 || strstr( seq2[0], "LNDDAT" ) && jcyc==1)
+ {
+ for( i=0; i<icyc; i++ )
+ printf( ">group1\n%s\n", seq1[i] );
+ for( j=0; j<jcyc; j++ )
+ printf( ">group2\n%s\n", seq2[j] );
+ exit( 1 );
+ }
+#endif
+
+
+ return( wm );
+}
if( seg == NULL )
{
- free( work ); work = NULL;
+ if( work ) free( work );
+ work = NULL;
+ allo = 0;
return;
}
}
-double Fgetlag( char **seq1, char **seq2,
+double Fgetlag(
+ double **n_dynamicmtx,
+ char **seq1, char **seq2,
double *eff1, double *eff2,
int clus1, int clus2,
int alloclen )
int count, count0;
int len1, len2;
int totallen;
- float dumfl = 0.0;
+ double dumdb = 0.0;
int headgp, tailgp;
len1 = strlen( seq1[0] );
Aalign( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen );
break;
case( 'M' ):
- MSalignmm( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, headgp, tailgp );
+ MSalignmm( n_dynamicmtx, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, headgp, tailgp );
break;
case( 'A' ):
if( clus1 == 1 && clus2 == 1 )
- G__align11( tmpres1, tmpres2, alloclen, headgp, tailgp );
+ G__align11( n_dynamicmtx, tmpres1, tmpres2, alloclen, headgp, tailgp );
else
- A__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, headgp, tailgp );
- break;
- case( 'H' ):
- if( clus1 == 1 && clus2 == 1 )
- G__align11( tmpres1, tmpres2, alloclen, headgp, tailgp );
- else
- H__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
- break;
- case( 'Q' ):
- if( clus1 == 1 && clus2 == 1 )
- G__align11( tmpres1, tmpres2, alloclen, headgp, tailgp );
- else
- Q__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
+ A__align( n_dynamicmtx, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, headgp, tailgp, -1, -1 );
break;
default:
fprintf( stderr, "alg = %c\n", alg );
-float Falign( char **seq1, char **seq2,
+double Falign( int **whichmtx, double ***scoringmatrices, double **n_dynamicmtx,
+ char **seq1, char **seq2,
double *eff1, double *eff2,
+ double **eff1s, double **eff2s,
int clus1, int clus2,
int alloclen, int *fftlog,
int *chudanpt, int chudanref, int *chudanres )
int count, count0;
int len1, len2;
int totallen;
- float totalscore;
- float dumfl = 0.0;
+ double totalscore;
+ double dumdb = 0.0;
int headgp, tailgp;
{
// fprintf( stderr, "Freeing localarrays in Falign\n" );
localalloclen = 0;
+ prevalloclen = 0;
+ crossscoresize = 0;
mymergesort( 0, 0, NULL );
alignableReagion( 0, 0, NULL, NULL, NULL, NULL, NULL );
fft( 0, NULL, 1 );
- A__align( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
- G__align11( NULL, NULL, 0, 0, 0 );
+ A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 );
+ D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+ A__align_variousdist( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+ D__align_variousdist( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+ G__align11( NULL, NULL, NULL, 0, 0, 0 );
blockAlign2( NULL, NULL, NULL, NULL, NULL, NULL );
if( crossscore ) FreeDoubleMtx( crossscore );
- FreeCharMtx( result1 );
+ crossscore = NULL;
+ FreeCharMtx( result1 ); result1 = NULL;
FreeCharMtx( result2 );
FreeCharMtx( tmpres1 );
FreeCharMtx( tmpres2 );
+
#define CAND 0
#if CAND
fftfp = fopen( "cand", "w" );
totalscore += Aalign( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen );
break;
case( 'M' ):
- totalscore += MSalignmm( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp );
+ if( scoringmatrices ) // called by tditeration.c
+ totalscore += MSalignmm_variousdist( NULL, scoringmatrices, NULL, tmpres1, tmpres2, eff1, eff2, eff1s, eff2s, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp );
+ else
+ totalscore += MSalignmm( n_dynamicmtx, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp );
+// totalscore += MSalignmm( n_dis_consweight_multi, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp );
break;
- case( 'A' ):
+ case( 'd' ):
if( clus1 == 1 && clus2 == 1 )
{
- totalscore += G__align11( tmpres1, tmpres2, alloclen, headgp, tailgp );
+ totalscore += G__align11( n_dynamicmtx, tmpres1, tmpres2, alloclen, headgp, tailgp );
}
else
- totalscore += A__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, &dumfl, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp );
- break;
- case( 'H' ):
- if( clus1 == 1 && clus2 == 1 )
{
- totalscore += G__align11( tmpres1, tmpres2, alloclen, headgp, tailgp );
+ if( scoringmatrices ) // called by tditeration.c
+ {
+ totalscore += D__align_variousdist( whichmtx, scoringmatrices, NULL, tmpres1, tmpres2, eff1, eff2, eff1s, eff2s, clus1, clus2, alloclen, NULL, &dumdb, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp );
+ }
+ else
+ totalscore += D__align( n_dynamicmtx, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, &dumdb, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp );
}
- else
- totalscore += H__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, &dumfl, sgap1, sgap2, egap1, egap2 );
break;
- case( 'Q' ):
+ case( 'A' ):
if( clus1 == 1 && clus2 == 1 )
{
- totalscore += G__align11( tmpres1, tmpres2, alloclen, headgp, tailgp );
+ totalscore += G__align11( n_dynamicmtx, tmpres1, tmpres2, alloclen, headgp, tailgp );
}
else
- totalscore += Q__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, &dumfl, sgap1, sgap2, egap1, egap2 );
+ {
+ if( scoringmatrices ) // called by tditeration.c
+ {
+ totalscore += A__align_variousdist( whichmtx, scoringmatrices, NULL, tmpres1, tmpres2, eff1, eff2, eff1s, eff2s, clus1, clus2, alloclen, NULL, &dumdb, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp );
+ }
+ else
+ totalscore += A__align( n_dynamicmtx, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, NULL, &dumdb, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres, headgp, tailgp, -1, -1 );
+ }
break;
default:
fprintf( stderr, "alg = %c\n", alg );
for( j=0; j<clus2; j++ ) strcat( result2[j], tmpres2[j] );
totallen += nlen;
#if 0
- fprintf( stderr, "i=%d", i );
+ fprintf( stderr, "$#####$$$$ i=%d", i );
fprintf( stderr, "%4d\n", totallen );
fprintf( stderr, "\n\n" );
for( j=0; j<clus1; j++ )
#if 0
for( j=0; j<clus1; j++ )
{
- fprintf( stderr, "%s\n", result1[j] );
+ fprintf( stderr, "in Falign, %s\n", result1[j] );
}
fprintf( stderr, "- - - - - - - - - - -\n" );
for( j=0; j<clus2; j++ )
{
- fprintf( stderr, "%s\n", result2[j] );
+ fprintf( stderr, "in Falign, %s\n", result2[j] );
}
#endif
return( totalscore );
/*
sakujo wo kentou (2010/10/05)
*/
-float Falign_udpari_long( char **seq1, char **seq2,
+double Falign_udpari_long(
+ int **whichmtx, double ***scoringmatrices,
+ double **n_dynamicmtx,
+ char **seq1, char **seq2,
double *eff1, double *eff2,
+ double **eff1s, double **eff2s,
int clus1, int clus2,
int alloclen, int *fftlog )
{
int count, count0;
int len1, len2;
int totallen;
- float totalscore;
+ double totalscore;
int nkouho = 0;
int headgp, tailgp;
-// float dumfl = 0.0;
+// double dumfl = 0.0;
+
+ if( seq1 == NULL )
+ {
+ if( result1 )
+ {
+// fprintf( stderr, "### Freeing localarrays in Falign\n" );
+ localalloclen = 0;
+ prevalloclen = 0;
+ crossscoresize = 0;
+ mymergesort( 0, 0, NULL );
+ alignableReagion( 0, 0, NULL, NULL, NULL, NULL, NULL );
+ fft( 0, NULL, 1 );
+ A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 );
+ A__align_variousdist( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+ D__align_variousdist( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+ G__align11( NULL, NULL, NULL, 0, 0, 0 );
+ blockAlign2( NULL, NULL, NULL, NULL, NULL, NULL );
+ if( crossscore ) FreeDoubleMtx( crossscore );
+ crossscore = NULL; // reallocate sareru kanousei ga arunode.
+ FreeCharMtx( result1 ); result1 = NULL;
+ FreeCharMtx( result2 );
+ FreeCharMtx( tmpres1 );
+ FreeCharMtx( tmpres2 );
+ FreeCharMtx( tmpseq1 );
+ FreeCharMtx( tmpseq2 );
+ free( sgap1 );
+ free( egap1 );
+ free( sgap2 );
+ free( egap2 );
+ free( kouho );
+ free( cut1 );
+ free( cut2 );
+ free( tmpptr1 );
+ free( tmpptr2 );
+ free( segment );
+ free( segment1 );
+ free( segment2 );
+ free( sortedseg1 );
+ free( sortedseg2 );
+ if( !kobetsubunkatsu )
+ {
+ FreeFukusosuuMtx ( seqVector1 );
+ FreeFukusosuuMtx ( seqVector2 );
+ FreeFukusosuuVec( naisekiNoWa );
+ FreeFukusosuuMtx( naiseki );
+ FreeDoubleVec( soukan );
+ }
+ }
+ else
+ {
+// fprintf( stderr, "Did not allocate localarrays in Falign\n" );
+ }
+
+ return( 0.0 );
+ }
switch( alg )
{
case( 'M' ):
- totalscore += MSalignmm( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, NULL, 0, NULL, headgp, tailgp );
+ if( scoringmatrices ) // called by tditeration.c
+ totalscore += MSalignmm_variousdist( NULL, scoringmatrices, NULL, tmpres1, tmpres2, eff1, eff2, eff1s, eff2s, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, NULL, 0, NULL, headgp, tailgp );
+ else
+ totalscore += MSalignmm( n_dynamicmtx, tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, sgap1, sgap2, egap1, egap2, NULL, 0, NULL, headgp, tailgp );
break;
default:
fprintf( stderr, "alg = %c\n", alg );
static TLS int n;
for( ; *seq; result++ )
{
- n = amino_n[(int)*seq++];
+ n = amino_n[(unsigned char)*seq++];
if( n < 20 && n >= 0 ) result->R += incr * score[n];
#if 0
fprintf( stderr, "n=%d, score=%f, inc=%f R=%f\n",n, score[n], incr * score[n], result->R );
int n;
for( i=0; *seq; i++ )
{
- n = amino_n[(int)*seq++];
+ n = amino_n[(unsigned char)*seq++];
if( n < n20or4or2 && n >= 0 ) result[n][i].R += incr;
}
}
}
-float Falign_localhom( char **seq1, char **seq2,
+double Falign_localhom( int **whichmtx, double ***scoringmatrices, double **n_dynamicmtx,
+ char **seq1, char **seq2,
double *eff1, double *eff2,
+ double **eff1s, double **eff2s,
int clus1, int clus2,
int alloclen,
- LocalHom ***localhom, float *totalimpmatch,
+ LocalHom ***localhom, double *totalimpmatch,
int *gapmap1, int *gapmap2,
int *chudanpt, int chudanref, int *chudanres )
{
int count, count0;
int len1, len2;
int totallen;
- float totalscore;
- float impmatch;
+ double totalscore;
+ double impmatch;
extern Fukusosuu *AllocateFukusosuuVec();
extern Fukusosuu **AllocateFukusosuuMtx();
{
// fprintf( stderr, "Freeing localarrays in Falign\n" );
localalloclen = 0;
+ crossscoresize = 0;
mymergesort( 0, 0, NULL );
alignableReagion( 0, 0, NULL, NULL, NULL, NULL, NULL );
fft( 0, NULL, 1 );
- A__align( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
- G__align11( NULL, NULL, 0, 0, 0 );
+// A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 ); // iru?
+ G__align11( NULL, NULL, NULL, 0, 0, 0 );
partA__align( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL );
+ partA__align_variousdist( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL );
blockAlign2( NULL, NULL, NULL, NULL, NULL, NULL );
if( crossscore ) FreeDoubleMtx( crossscore );
FreeCharMtx( result1 );
case( 'a' ):
totalscore += Aalign( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen );
break;
- case( 'Q' ):
- totalscore += partQ__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, localhom, &impmatch, cut1[i], cut1[i+1]-1, cut2[i], cut2[i+1]-1, gapmap1, gapmap2, sgap1, sgap2, egap1, egap2 );
- *totalimpmatch += impmatch;
-// fprintf( stderr, "*totalimpmatch in Falign_localhom = %f\n", *totalimpmatch );
- break;
case( 'A' ):
- totalscore += partA__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, localhom, &impmatch, cut1[i], cut1[i+1]-1, cut2[i], cut2[i+1]-1, gapmap1, gapmap2, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres );
+ if( scoringmatrices ) // called by tditeration.c
+ {
+ totalscore += partA__align_variousdist( whichmtx, scoringmatrices, NULL, tmpres1, tmpres2, eff1, eff2, eff1s, eff2s, clus1, clus2, alloclen, localhom, &impmatch, cut1[i], cut1[i+1]-1, cut2[i], cut2[i+1]-1, gapmap1, gapmap2, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres );
+ }
+ else
+ totalscore += partA__align( tmpres1, tmpres2, eff1, eff2, clus1, clus2, alloclen, localhom, &impmatch, cut1[i], cut1[i+1]-1, cut2[i], cut2[i+1]-1, gapmap1, gapmap2, sgap1, sgap2, egap1, egap2, chudanpt, chudanref, chudanres );
*totalimpmatch += impmatch;
// fprintf( stderr, "*totalimpmatch in Falign_localhom = %f\n", *totalimpmatch );
#define XXXXXXX 0
#define USE_PENALTY_EX 1
-
#if 1
-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
+static void match_calc_mtx( double **mtx, double *match, char **s1, char **s2, int i1, int lgth2 )
{
char *seq2 = s2[0];
- int *intptr = amino_dis[(int)s1[0][i1]];
+ double *doubleptr = mtx[(unsigned char)s1[0][i1]];
while( lgth2-- )
- *match++ = intptr[(int)*seq2++];
-}
-static void match_calc_mtx( int mtx[0x80][0x80], float *match, char **s1, char **s2, int i1, int lgth2 )
-{
- char *seq2 = s2[0];
- int *intptr = mtx[(int)s1[0][i1]];
-
- while( lgth2-- )
- *match++ = intptr[(int)*seq2++];
+ *match++ = doubleptr[(unsigned char)*seq2++];
}
#else
-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
+static void match_calc( double *match, char **s1, char **s2, int i1, int lgth2 )
{
int j;
}
#endif
-static float Atracking( float *lasthorizontalw, float *lastverticalw,
+static double Atracking( double *lasthorizontalw, double *lastverticalw,
char **seq1, char **seq2,
char **mseq1, char **mseq2,
int **ijp,
- int tailgp )
+ int tailgp,
+ int *warpis, int *warpjs, int warpbase )
{
int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;
// char gap[] = "-";
gap = newgapstr;
lgth1 = strlen( seq1[0] );
lgth2 = strlen( seq2[0] );
- float wm;
+ double wm;
#if 0
ijp[0][j] = -( j + 1 );
}
+// if( tailgp == 1 || ijp[lgth1][lgth2] >= warpbase )
if( tailgp == 1 )
;
else
*mseq2[0] = 0;
+
iin = lgth1; jin = lgth2;
limk = lgth1+lgth2 + 1;
for( k=0; k<limk; k++ )
{
- if( ijp[iin][jin] < 0 )
+ if( ijp[iin][jin] >= warpbase )
+ {
+// fprintf( stderr, "WARP!\n" );
+ ifi = warpis[ijp[iin][jin]-warpbase];
+ jfi = warpjs[ijp[iin][jin]-warpbase];
+ }
+ else if( ijp[iin][jin] < 0 )
{
ifi = iin-1; jfi = jin+ijp[iin][jin];
}
{
ifi = iin-1; jfi = jin-1;
}
- l = iin - ifi;
- while( --l )
+
+ if( ifi == -warpbase && jfi == -warpbase )
{
- *--mseq1[0] = seq1[0][ifi+l];
- *--mseq2[0] = *gap;
- k++;
+ l = iin;
+ while( --l >= 0 )
+ {
+ *--mseq1[0] = seq1[0][l];
+ *--mseq2[0] = *gap;
+ k++;
+ }
+ l= jin;
+ while( --l >= 0 )
+ {
+ *--mseq1[0] = *gap;
+ *--mseq2[0] = seq2[0][l];
+ k++;
+ }
+ break;
}
- l= jin - jfi;
- while( --l )
+ else
{
- *--mseq1[0] = *gap;
- *--mseq2[0] = seq2[0][jfi+l];
- k++;
+ l = iin - ifi;
+ while( --l > 0 )
+ {
+ *--mseq1[0] = seq1[0][ifi+l];
+ *--mseq2[0] = *gap;
+ k++;
+ }
+ l= jin - jfi;
+ while( --l > 0 )
+ {
+ *--mseq1[0] = *gap;
+ *--mseq2[0] = seq2[0][jfi+l];
+ k++;
+ }
}
if( iin <= 0 || jin <= 0 ) break;
*--mseq1[0] = seq1[0][ifi];
k++;
iin = ifi; jin = jfi;
}
+
+// fprintf( stderr, "%s\n", mseq1[0] );
+// fprintf( stderr, "%s\n", mseq2[0] );
return( 0.0 );
}
-float G__align11( char **seq1, char **seq2, int alloclen, int headgp, int tailgp )
-/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
+double G__align11( double **n_dynamicmtx, char **seq1, char **seq2, int alloclen, int headgp, int tailgp )
{
// int k;
register int i, j;
int lasti; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
+ int lastj;
int lgth1, lgth2;
int resultlen;
- float wm; /* int ?????? */
- float g;
- float *currentw, *previousw;
- float fpenalty = (float)penalty;
+ double wm; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
+ double fpenalty = (double)penalty;
+ double fpenalty_shift = (double)penalty_shift;
+ double fpenalty_tmp;
#if USE_PENALTY_EX
- float fpenalty_ex = (float)penalty_ex;
+ double fpenalty_ex = (double)penalty_ex;
#endif
#if 1
- float *wtmp;
+ double *wtmp;
int *ijppt;
- float *mjpt, *prept, *curpt;
+ double *mjpt, *prept, *curpt;
int *mpjpt;
#endif
- static TLS float mi, *m;
- static TLS int **ijp;
- static TLS int mpi, *mp;
- static TLS float *w1, *w2;
- static TLS float *match;
- static TLS float *initverticalw; /* kufuu sureba iranai */
- static TLS float *lastverticalw; /* kufuu sureba iranai */
- static TLS char **mseq1;
- static TLS char **mseq2;
- static TLS char **mseq;
- static TLS int **intwork;
- static TLS float **floatwork;
+ static TLS double mi = 0.0;
+ static TLS double *m = NULL;
+ static TLS int **ijp = NULL;
+ static TLS int mpi = 0;
+ static TLS int *mp = NULL;
+ static TLS double *w1 = NULL;
+ static TLS double *w2 = NULL;
+ static TLS double *match = NULL;
+ static TLS double *initverticalw = NULL; /* kufuu sureba iranai */
+ static TLS double *lastverticalw = NULL; /* kufuu sureba iranai */
+ static TLS char **mseq1 = NULL;
+ static TLS char **mseq2 = NULL;
+ static TLS char **mseq = NULL;
+ static TLS int **intwork = NULL;
+ static TLS double **doublework = NULL;
static TLS int orlgth1 = 0, orlgth2 = 0;
+ static TLS double **amino_dynamicmtx = NULL; // ??
+
+ int *warpis = NULL;
+ int *warpjs = NULL;
+ int *warpi = NULL;
+ int *warpj = NULL;
+ int *prevwarpi = NULL;
+ int *prevwarpj = NULL;
+ double *wmrecords = NULL;
+ double *prevwmrecords = NULL;
+ int warpn = 0;
+ int warpbase;
+ double curm = 0.0;
+ double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt;
+ int *warpipt, *warpjpt;
+
if( seq1 == NULL )
{
{
orlgth1 = 0;
orlgth2 = 0;
- free( mseq1 );
- free( mseq2 );
- FreeFloatVec( w1 );
- FreeFloatVec( w2 );
- FreeFloatVec( match );
- FreeFloatVec( initverticalw );
- FreeFloatVec( lastverticalw );
+ if( mseq1 ) free( mseq1 ); mseq1 = NULL;
+ if( mseq2 ) free( mseq2 ); mseq2 = NULL;
+ if( w1 ) FreeFloatVec( w1 ); w1 = NULL;
+ if( w2 ) FreeFloatVec( w2 ); w2 = NULL;
+ if( match ) FreeFloatVec( match ); match = NULL;
+ if( initverticalw ) FreeFloatVec( initverticalw ); initverticalw = NULL;
+ if( lastverticalw ) FreeFloatVec( lastverticalw ); lastverticalw = NULL;
- FreeFloatVec( m );
- FreeIntVec( mp );
+ if( m ) FreeFloatVec( m ); m = NULL;
+ if( mp ) FreeIntVec( mp ); mp = NULL;
- FreeCharMtx( mseq );
+ if( mseq ) FreeCharMtx( mseq ); mseq = NULL;
- FreeFloatMtx( floatwork );
- FreeIntMtx( intwork );
+ if( doublework ) FreeFloatMtx( doublework ); doublework = NULL;
+ if( intwork ) FreeIntMtx( intwork ); intwork = NULL;
+
+ if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL;
}
+ orlgth1 = 0;
+ orlgth2 = 0;
return( 0.0 );
}
+
+
lgth1 = strlen( seq1[0] );
lgth2 = strlen( seq2[0] );
+ warpbase = lgth1 + lgth2;
+ warpis = NULL;
+ warpjs = NULL;
+ warpn = 0;
+ if( trywarp )
+ {
+// fprintf( stderr, "IN G__align11\n" );
+ if( headgp == 0 || tailgp == 0 )
+ {
+ fprintf( stderr, "At present, headgp and tailgp must be 1.\n" );
+ exit( 1 );
+ }
+
+ wmrecords = AllocateFloatVec( lgth2+1 );
+ warpi = AllocateIntVec( lgth2+1 );
+ warpj = AllocateIntVec( lgth2+1 );
+ prevwmrecords = AllocateFloatVec( lgth2+1 );
+ prevwarpi = AllocateIntVec( lgth2+1 );
+ prevwarpj = AllocateIntVec( lgth2+1 );
+ for( i=0; i<lgth2+1; i++ ) prevwmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) wmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwarpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) prevwarpj[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpj[i] = -warpbase;
+ }
+
+#if 0
if( lgth1 <= 0 || lgth2 <= 0 )
{
fprintf( stderr, "WARNING (g11): lgth1=%d, lgth2=%d\n", lgth1, lgth2 );
}
+#endif
#if 1
if( lgth1 == 0 && lgth2 == 0 )
if( lgth1 == 0 )
{
seq1[0][lgth2] = 0;
- while( lgth2 ) seq1[0][--lgth2] = '-';
+ while( lgth2 ) seq1[0][--lgth2] = *newgapstr;
// fprintf( stderr, "seq1[0] = %s\n", seq1[0] );
return( 0.0 );
}
if( lgth2 == 0 )
{
seq2[0][lgth1] = 0;
- while( lgth1 ) seq2[0][--lgth1] = '-';
+ while( lgth1 ) seq2[0][--lgth1] = *newgapstr;
// fprintf( stderr, "seq2[0] = %s\n", seq2[0] );
return( 0.0 );
}
- FreeFloatMtx( floatwork );
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
+ FreeDoubleMtx( amino_dynamicmtx );
}
ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
mseq = AllocateCharMtx( njob, ll1+ll2 );
- floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 );
- intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 );
+ doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
+ intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
+ amino_dynamicmtx = AllocateDoubleMtx( 0x100, 0x100 );
#if DEBUG
fprintf( stderr, "succeeded\n" );
orlgth1 = ll1 - 100;
orlgth2 = ll2 - 100;
}
+ for( i=0; i<nalphabets; i++) for( j=0; j<nalphabets; j++ )
+ amino_dynamicmtx[(unsigned char)amino[i]][(unsigned char)amino[j]] = (double)n_dynamicmtx[i][j];
mseq1[0] = mseq[0];
previousw = w2;
- match_calc( initverticalw, seq2, seq1, 0, lgth1 );
-
-
- match_calc( currentw, seq1, seq2, 0, lgth2 );
+ match_calc_mtx( amino_dynamicmtx, initverticalw, seq2, seq1, 0, lgth1 );
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, 0, lgth2 );
if( headgp == 1 )
{
lastverticalw[0] = currentw[lgth2-1]; // lgth2==0 no toki error
if( tailgp ) lasti = lgth1+1; else lasti = lgth1;
+ lastj = lgth2+1;
#if XXXXXXX
fprintf( stderr, "currentw = \n" );
previousw[0] = initverticalw[i-1];
- match_calc( currentw, seq1, seq2, i, lgth2 );
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, i, lgth2 );
#if XXXXXXX
fprintf( stderr, "\n" );
fprintf( stderr, "i=%d\n", i );
prept = previousw;
curpt = currentw + 1;
mpjpt = mp + 1;
- for( j=1; j<lgth2+1; j++ )
+ if( trywarp )
+ {
+ prevwmrecordspt = prevwmrecords;
+ wmrecordspt = wmrecords+1;
+ wmrecords1pt = wmrecords;
+ warpipt = warpi + 1;
+ warpjpt = warpj + 1;
+ }
+
+ for( j=1; j<lastj; j++ )
{
+
wm = *prept;
*ijppt = 0;
#if USE_PENALTY_EX
m[j] += fpenalty_ex;
#endif
-
-#if 0
- fprintf( stderr, "%5.0f ", wm );
+#if 1
+ if( trywarp )
+ {
+ fpenalty_tmp = fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] );
+// fprintf( stderr, "fpenalty_shift = %f\n", fpenalty_tmp );
+
+// fprintf( stderr, "\n\n\nwarp to %c-%c (%d-%d) from %c-%c (%d-%d) ? prevwmrecords[%d] = %f + %f <- wm = %f\n", seq1[0][prevwarpi[j-1]], seq2[0][prevwarpj[j-1]], prevwarpi[j-1], prevwarpj[j-1], seq1[0][i], seq2[0][j], i, j, j, prevwmrecords[j-1], fpenalty_tmp, wm );
+// if( (g=prevwmrecords[j-1] + fpenalty_shift )> wm )
+ if( ( g=*prevwmrecordspt++ + fpenalty_tmp )> wm ) // naka ha osokute kamawanai
+ {
+// fprintf( stderr, "Yes! Warp!! from %d-%d (%c-%c) to %d-%d (%c-%c) fpenalty_tmp = %f! warpn = %d\n", i, j, seq1[0][i], seq2[0][j-1], prevwarpi[j-1], prevwarpj[j-1],seq1[0][prevwarpi[j-1]], seq2[0][prevwarpj[j-1]], fpenalty_tmp, warpn );
+ if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] )
+ {
+ *ijppt = warpbase + warpn - 1;
+ }
+ else
+ {
+ *ijppt = warpbase + warpn;
+ warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) );
+ warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) );
+ warpis[warpn] = prevwarpi[j-1];
+ warpjs[warpn] = prevwarpj[j-1];
+ warpn++;
+ }
+ wm = g;
+ }
+ else
+ {
+ }
+
+ curm = *curpt + wm;
+
+// fprintf( stderr, "###### curm = %f at %c-%c, i=%d, j=%d\n", curm, seq1[0][i], seq2[0][j], i, j );
+
+// fprintf( stderr, "copy from i, j-1? %f > %f?\n", wmrecords[j-1], curm );
+// if( wmrecords[j-1] > wmrecords[j] )
+ if( *wmrecords1pt > *wmrecordspt )
+ {
+// fprintf( stderr, "yes\n" );
+// wmrecords[j] = wmrecords[j-1];
+ *wmrecordspt = *wmrecords1pt;
+// warpi[j] = warpi[j-1];
+// warpj[j] = warpj[j-1];
+ *warpipt = *(warpipt-1);
+ *warpjpt = *(warpjpt-1);
+// fprintf( stderr, "warpi[j]=%d, warpj[j]=%d wmrecords[j] = %f\n", warpi[j], warpj[j], wmrecords[j] );
+ }
+// else
+// {
+// fprintf( stderr, "no\n" );
+// }
+
+// fprintf( stderr, " curm = %f at %c-%c\n", curm, seq1[0][i], seq2[0][j] );
+// fprintf( stderr, " wmrecords[%d] = %f\n", j, wmrecords[j] );
+// fprintf( stderr, "replace?\n" );
+
+// if( curm > wmrecords[j] )
+ if( curm > *wmrecordspt )
+ {
+// fprintf( stderr, "yes at %d-%d (%c-%c), replaced warp: warpi[j]=%d, warpj[j]=%d warpn=%d, wmrecords[j] = %f -> %f\n", i, j, seq1[0][i], seq2[0][j], i, j, warpn, wmrecords[j], curm );
+// wmrecords[j] = curm;
+ *wmrecordspt = curm;
+// warpi[j] = i;
+// warpj[j] = j;
+ *warpipt = i;
+ *warpjpt = j;
+ }
+// else
+// {
+// fprintf( stderr, "No! warpi[j]=%d, warpj[j]=%d wmrecords[j] = %f\n", warpi[j], warpj[j], wmrecords[j] );
+// }
+// fprintf( stderr, "%d-%d (%c-%c) curm = %5.0f, wmrecords[j]=%f\n", i, j, seq1[0][i], seq2[0][j], curm, wmrecords[j] );
+ wmrecordspt++;
+ wmrecords1pt++;
+ warpipt++;
+ warpjpt++;
+ }
#endif
+
*curpt++ += wm;
ijppt++;
mjpt++;
mpjpt++;
}
lastverticalw[i] = currentw[lgth2-1]; // lgth2==0 no toki error
+
+ if( trywarp )
+ {
+ fltncpy( prevwmrecords, wmrecords, lastj );
+ intncpy( prevwarpi, warpi, lastj );
+ intncpy( prevwarpj, warpj, lastj );
+ }
}
- Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, tailgp );
+ if( trywarp )
+ {
+// fprintf( stderr, "\nwm = %f\n", wm );
+// fprintf( stderr, "warpn = %d\n", warpn );
+ free( wmrecords );
+ free( prevwmrecords );
+ free( warpi );
+ free( warpj );
+ free( prevwarpi );
+ free( prevwarpj );
+ }
+
+ Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, tailgp, warpis, warpjs, warpbase );
+ if( warpis ) free( warpis );
+ if( warpjs ) free( warpjs );
+
resultlen = strlen( mseq1[0] );
if( alloclen < resultlen || resultlen > N )
return( wm );
}
-float G__align11_noalign( int scoremtx[0x80][0x80], int penal, int penal_ex, char **seq1, char **seq2, int alloclen )
-/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
+double G__align11_noalign( double **n_dynamicmtx, int penal, int penal_ex, char **seq1, char **seq2, int alloclen )
+/* warp mitaiou */
{
// int k;
register int i, j;
int lasti; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
int lgth1, lgth2;
// int resultlen;
- float wm; /* int ?????? */
- float g;
- float *currentw, *previousw;
- float fpenalty = (float)penal;
+ double wm; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
+ double fpenalty = (double)penal;
#if USE_PENALTY_EX
- float fpenalty_ex = (float)penal_ex;
+ double fpenalty_ex = (double)penal_ex;
#endif
#if 1
- float *wtmp;
- float *mjpt, *prept, *curpt;
+ double *wtmp;
+ double *mjpt, *prept, *curpt;
// int *mpjpt;
#endif
- static TLS float mi, *m;
- static TLS float *w1, *w2;
- static TLS float *match;
- static TLS float *initverticalw; /* kufuu sureba iranai */
- static TLS float *lastverticalw; /* kufuu sureba iranai */
+ static TLS double mi, *m;
+ static TLS double *w1, *w2;
+ static TLS double *match;
+ static TLS double *initverticalw; /* kufuu sureba iranai */
+ static TLS double *lastverticalw; /* kufuu sureba iranai */
static TLS int **intwork;
- static TLS float **floatwork;
+ static TLS double **doublework;
static TLS int orlgth1 = 0, orlgth2 = 0;
+ static TLS double **amino_dynamicmtx;
if( seq1 == NULL )
{
free( m );
- FreeFloatMtx( floatwork );
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
+ FreeDoubleMtx( amino_dynamicmtx );
}
return( 0.0 );
}
+
wm = 0.0;
+#if 0
if( lgth1 <= 0 || lgth2 <= 0 )
{
fprintf( stderr, "WARNING (g11): lgth1=%d, lgth2=%d\n", lgth1, lgth2 );
}
+#endif
if( lgth1 > orlgth1 || lgth2 > orlgth2 )
{
- FreeFloatMtx( floatwork );
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
+
+ FreeDoubleMtx( amino_dynamicmtx );
}
ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
- floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 );
- intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 );
+ doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
+ intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
+
+ amino_dynamicmtx = AllocateDoubleMtx( 0x80, 0x80 );
#if DEBUG
fprintf( stderr, "succeeded\n" );
#endif
}
+ for( i=0; i<nalphabets; i++) for( j=0; j<nalphabets; j++ )
+ amino_dynamicmtx[(int)amino[i]][(int)amino[j]] = (double)n_dynamicmtx[i][j];
previousw = w2;
- match_calc_mtx( scoremtx, initverticalw, seq2, seq1, 0, lgth1 );
+ match_calc_mtx( amino_dynamicmtx, initverticalw, seq2, seq1, 0, lgth1 );
- match_calc_mtx( scoremtx, currentw, seq1, seq2, 0, lgth2 );
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, 0, lgth2 );
if( 1 ) // tsuneni outgap-1
{
previousw[0] = initverticalw[i-1];
- match_calc_mtx( scoremtx, currentw, seq1, seq2, i, lgth2 );
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, i, lgth2 );
#if XXXXXXX
fprintf( stderr, "\n" );
fprintf( stderr, "i=%d\n", i );
#define DEFAULTOFS_J -123 /* +10 -- -50 teido ka ? */
#define DEFAULTPAMN 200
-void JTTmtx( double **rsr, double *freq, char locamino[26], char locgrp[26], int isTM )
+void JTTmtx( double **rsr, double *freq, unsigned char locamino[26], char locgrp[26], int isTM )
{
int i, j;
double r[20][20];
static TLS int localstop; // 060910
#if 1
-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
+static void match_calc_mtx( double **mtx, double *match, char **s1, char **s2, int i1, int lgth2 )
{
char *seq2 = s2[0];
- int *intptr;
+ double *doubleptr = mtx[(unsigned char)s1[0][i1]];
- intptr = amino_dis[(int)s1[0][i1]];
while( lgth2-- )
- *match++ = intptr[(int)*seq2++];
+ *match++ = doubleptr[(unsigned char)*seq2++];
}
#else
-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
+static void match_calc( double *match, char **s1, char **s2, int i1, int lgth2 )
{
int j;
#endif
#if 0
-static void match_calc_bk( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )
+static void match_calc_bk( double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
{
int j, k, l;
- float scarr[26];
- float **cpmxpd = floatwork;
+ double scarr[nalphabets];
+ double **cpmxpd = doublework;
int **cpmxpdn = intwork;
int count = 0;
for( j=0; j<lgth2; j++ )
{
count = 0;
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
if( cpmx2[l][j] )
{
}
}
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
scarr[l] = 0.0;
- for( k=0; k<26; k++ )
+ for( k=0; k<nalphabets; k++ )
scarr[l] += n_dis[k][l] * cpmx1[k][i1];
}
-#if 0 /* ¤³¤ì¤ò»È¤¦¤È¤\ad¤Ïfloatwork¤Î¥¢¥í¥±¡¼¥È¤òµÕ¤Ë¤¹¤ë */
+#if 0 /* ¤³¤ì¤ò»È¤¦¤È¤\ad¤Ïdoublework¤Î¥¢¥í¥±¡¼¥È¤òµÕ¤Ë¤¹¤ë */
{
- float *fpt, **fptpt, *fpt2;
+ double *fpt, **fptpt, *fpt2;
int *ipt, **iptpt;
fpt2 = match;
iptpt = cpmxpdn;
}
#endif
-static float Ltracking( float *lasthorizontalw, float *lastverticalw,
+static double Ltracking( double *lasthorizontalw, double *lastverticalw,
char **seq1, char **seq2,
char **mseq1, char **mseq2,
- int **ijp, int *off1pt, int *off2pt, int endi, int endj )
+ int **ijp, int *off1pt, int *off2pt, int endi, int endj,
+ int *warpis, int *warpjs, int warpbase )
{
int i, j, l, iin, jin, lgth1, lgth2, k, limk;
int ifi=0, jfi=0; // by D.Mathog, a guess
limk = lgth1+lgth2;
for( k=0; k<=limk; k++ )
{
- if( ijp[iin][jin] < 0 )
+ if( ijp[iin][jin] >= warpbase )
+ {
+// fprintf( stderr, "WARP!\n" );
+ ifi = warpis[ijp[iin][jin]-warpbase];
+ jfi = warpjs[ijp[iin][jin]-warpbase];
+ }
+ else if( ijp[iin][jin] < 0 )
{
ifi = iin-1; jfi = jin+ijp[iin][jin];
}
{
ifi = iin-1; jfi = jin-1;
}
- l = iin - ifi;
- while( --l )
+
+
+#if 1 // sentou de warp?
+ if( ifi == -warpbase && jfi == -warpbase )
{
- *--mseq1[0] = seq1[0][ifi+l];
- *--mseq2[0] = *gap;
- k++;
+ l = iin;
+ while( --l >= 0 )
+ {
+ *--mseq1[0] = seq1[0][l];
+ *--mseq2[0] = *gap;
+ k++;
+ }
+ l= jin;
+ while( --l >= 0 )
+ {
+ *--mseq1[0] = *gap;
+ *--mseq2[0] = seq2[0][l];
+ k++;
+ }
+ break;
}
- l= jin - jfi;
- while( --l )
+ else
+#endif
{
- *--mseq1[0] = *gap;
- *--mseq2[0] = seq2[0][jfi+l];
- k++;
+ l = iin - ifi;
+ while( --l > 0 )
+ {
+ *--mseq1[0] = seq1[0][ifi+l];
+ *--mseq2[0] = *gap;
+ k++;
+ }
+ l= jin - jfi;
+ while( --l > 0 )
+ {
+ *--mseq1[0] = *gap;
+ *--mseq2[0] = seq2[0][jfi+l];
+ k++;
+ }
}
+
if( iin <= 0 || jin <= 0 ) break;
*--mseq1[0] = seq1[0][ifi];
*--mseq2[0] = seq2[0][jfi];
if( jfi == -1 ) *off2pt = 0; else *off2pt = jfi;
// fprintf( stderr, "ifn = %d, jfn = %d\n", ifi, jfi );
+// fprintf( stderr, "\n" );
+// fprintf( stderr, "%s\n", mseq1[0] );
+// fprintf( stderr, "%s\n", mseq2[0] );
return( 0.0 );
}
-float L__align11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt )
+double L__align11( double **n_dynamicmtx, double scoreoffset, char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
// int k;
- register int i, j;
+ int i, j;
int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
int lgth1, lgth2;
int resultlen;
- float wm = 0.0; /* int ?????? */
- float g;
- float *currentw, *previousw;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
#if 1
- float *wtmp;
+ double *wtmp;
int *ijppt;
- float *mjpt, *prept, *curpt;
+ double *mjpt, *prept, *curpt;
int *mpjpt;
#endif
- static TLS float mi, *m;
+ static TLS double mi, *m;
static TLS int **ijp;
static TLS int mpi, *mp;
- static TLS float *w1, *w2;
- static TLS float *match;
- static TLS float *initverticalw; /* kufuu sureba iranai */
- static TLS float *lastverticalw; /* kufuu sureba iranai */
+ static TLS double *w1, *w2;
+ static TLS double *match;
+ static TLS double *initverticalw; /* kufuu sureba iranai */
+ static TLS double *lastverticalw; /* kufuu sureba iranai */
static TLS char **mseq1;
static TLS char **mseq2;
static TLS char **mseq;
// static TLS int **intwork;
-// static TLS float **floatwork;
+// static TLS double **doublework;
static TLS int orlgth1 = 0, orlgth2 = 0;
- float maxwm;
+ static TLS double **amino_dynamicmtx = NULL; // ??
+ double maxwm;
int endali = 0, endalj = 0; // by D.Mathog, a guess
// int endali, endalj;
- float localthr = -offset;
- float localthr2 = -offset;
-// float localthr = 100;
-// float localthr2 = 100;
- float fpenalty = (float)penalty;
- float fpenalty_ex = (float)penalty_ex;
+ double localthr = -offset + scoreoffset * 600; // 2013/12/13
+ double localthr2 = -offset + scoreoffset * 600; // 2013/12/13
+// double localthr = -offset;
+// double localthr2 = -offset;
+ double fpenalty = (double)penalty;
+ double fpenalty_ex = (double)penalty_ex;
+ double fpenalty_shift = (double)penalty_shift;
+ double fpenalty_tmp; // atode kesu
+
+ int *warpis = NULL;
+ int *warpjs = NULL;
+ int *warpi = NULL;
+ int *warpj = NULL;
+ int *prevwarpi = NULL;
+ int *prevwarpj = NULL;
+ double *wmrecords = NULL;
+ double *prevwmrecords = NULL;
+ int warpn = 0;
+ int warpbase;
+ double curm = 0.0;
+ double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt;
+ int *warpipt, *warpjpt;
+
+
if( seq1 == NULL )
{
FreeIntVec( mp );
FreeCharMtx( mseq );
+ if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL;
}
return( 0.0 );
lgth1 = strlen( seq1[0] );
lgth2 = strlen( seq2[0] );
+
+ warpbase = lgth1 + lgth2;
+ warpis = NULL;
+ warpjs = NULL;
+ warpn = 0;
+ if( trywarp )
+ {
+ wmrecords = AllocateFloatVec( lgth2+1 );
+ warpi = AllocateIntVec( lgth2+1 );
+ warpj = AllocateIntVec( lgth2+1 );
+ prevwmrecords = AllocateFloatVec( lgth2+1 );
+ prevwarpi = AllocateIntVec( lgth2+1 );
+ prevwarpj = AllocateIntVec( lgth2+1 );
+ for( i=0; i<lgth2+1; i++ ) prevwmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) wmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwarpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) prevwarpj[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpj[i] = -warpbase;
+ }
+
+
if( lgth1 > orlgth1 || lgth2 > orlgth2 )
{
int ll1, ll2;
FreeIntVec( mp );
FreeCharMtx( mseq );
+ if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL;
-
-// FreeFloatMtx( floatwork );
+// FreeFloatMtx( doublework );
// FreeIntMtx( intwork );
}
mseq = AllocateCharMtx( njob, ll1+ll2 );
-// floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 );
-// intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 );
+// doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
+// intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
#if DEBUG
fprintf( stderr, "succeeded\n" );
#endif
-
+ amino_dynamicmtx = AllocateDoubleMtx( 0x100, 0x100 );
orlgth1 = ll1 - 100;
orlgth2 = ll2 - 100;
}
+ for( i=0; i<nalphabets; i++) for( j=0; j<nalphabets; j++ )
+ amino_dynamicmtx[(int)amino[i]][(int)amino[j]] = (double)n_dynamicmtx[i][j];
+
mseq1[0] = mseq[0];
mseq2[0] = mseq[1];
currentw = w1;
previousw = w2;
- match_calc( initverticalw, seq2, seq1, 0, lgth1 );
+ match_calc_mtx( amino_dynamicmtx, initverticalw, seq2, seq1, 0, lgth1 );
- match_calc( currentw, seq1, seq2, 0, lgth2 );
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, 0, lgth2 );
lasti = lgth2+1;
previousw[0] = initverticalw[i-1];
- match_calc( currentw, seq1, seq2, i, lgth2 );
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, i, lgth2 );
#if DEBUG2
fprintf( stderr, "%c ", seq1[0][i] );
fprintf( stderr, "%5.0f ", currentw[0] );
curpt = currentw + 1;
mpjpt = mp + 1;
lastj = lgth2+1;
+
+ if( trywarp )
+ {
+ prevwmrecordspt = prevwmrecords;
+ wmrecordspt = wmrecords+1;
+ wmrecords1pt = wmrecords;
+ warpipt = warpi + 1;
+ warpjpt = warpj + 1;
+ }
for( j=1; j<lastj; j++ )
{
wm = *prept;
#if 1
if( wm < localthr )
{
-// fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt );
+// fprintf( stderr, "stop i=%d, j=%d, curpt=%f, localthr = %f\n", i, j, *curpt, localthr );
*ijppt = localstop;
wm = localthr2;
}
#if 0
fprintf( stderr, "%5.0f ", *curpt );
#endif
-#if DEBUG2
- fprintf( stderr, "%5.0f ", wm );
+#if 0
+ fprintf( stderr, "wm (%d,%d) = %5.0f\n", i, j, wm );
// fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop );
#endif
+ if( trywarp )
+ {
+ fpenalty_tmp = fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] );
+// fprintf( stderr, "fpenalty_shift = %f\n", fpenalty_tmp );
+
+// fprintf( stderr, "\n\n\nwarp to %c-%c (%d-%d) from %c-%c (%d-%d) ? prevwmrecords[%d] = %f + %f <- wm = %f\n", seq1[0][prevwarpi[j-1]], seq2[0][prevwarpj[j-1]], prevwarpi[j-1], prevwarpj[j-1], seq1[0][i], seq2[0][j], i, j, j, prevwmrecords[j-1], fpenalty_tmp, wm );
+// if( (g=prevwmrecords[j-1] + fpenalty_shift )> wm )
+ if( ( g=*prevwmrecordspt++ + fpenalty_tmp )> wm ) // naka ha osokute kamawanai
+ {
+// fprintf( stderr, "Yes! Warp!! from %d-%d (%c-%c) to %d-%d (%c-%c) fpenalty_tmp = %f! warpn = %d\n", i, j, seq1[0][i], seq2[0][j-1], prevwarpi[j-1], prevwarpj[j-1],seq1[0][prevwarpi[j-1]], seq2[0][prevwarpj[j-1]], fpenalty_tmp, warpn );
+ if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] )
+ {
+ *ijppt = warpbase + warpn - 1;
+ }
+ else
+ {
+ *ijppt = warpbase + warpn;
+ warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) );
+ warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) );
+ warpis[warpn] = prevwarpi[j-1];
+ warpjs[warpn] = prevwarpj[j-1];
+ warpn++;
+ }
+ wm = g;
+ }
+ else
+ {
+ }
+
+ curm = *curpt + wm;
+
+// fprintf( stderr, "###### curm = %f at %c-%c, i=%d, j=%d\n", curm, seq1[0][i], seq2[0][j], i, j );
+
+// fprintf( stderr, "copy from i, j-1? %f > %f?\n", wmrecords[j-1], curm );
+// if( wmrecords[j-1] > wmrecords[j] )
+ if( *wmrecords1pt > *wmrecordspt )
+ {
+// fprintf( stderr, "yes\n" );
+// wmrecords[j] = wmrecords[j-1];
+ *wmrecordspt = *wmrecords1pt;
+// warpi[j] = warpi[j-1];
+// warpj[j] = warpj[j-1];
+ *warpipt = *(warpipt-1);
+ *warpjpt = *(warpjpt-1);
+// fprintf( stderr, "warpi[j]=%d, warpj[j]=%d wmrecords[j] = %f\n", warpi[j], warpj[j], wmrecords[j] );
+ }
+// else
+// {
+// fprintf( stderr, "no\n" );
+// }
+
+// fprintf( stderr, " curm = %f at %c-%c\n", curm, seq1[0][i], seq2[0][j] );
+// fprintf( stderr, " wmrecords[%d] = %f\n", j, wmrecords[j] );
+// fprintf( stderr, "replace?\n" );
+
+// if( curm > wmrecords[j] )
+ if( curm > *wmrecordspt )
+ {
+// fprintf( stderr, "yes at %d-%d (%c-%c), replaced warp: warpi[j]=%d, warpj[j]=%d warpn=%d, wmrecords[j] = %f -> %f\n", i, j, seq1[0][i], seq2[0][j], i, j, warpn, wmrecords[j], curm );
+// wmrecords[j] = curm;
+ *wmrecordspt = curm;
+// warpi[j] = i;
+// warpj[j] = j;
+ *warpipt = i;
+ *warpjpt = j;
+ }
+// else
+// {
+// fprintf( stderr, "No! warpi[j]=%d, warpj[j]=%d wmrecords[j] = %f\n", warpi[j], warpj[j], wmrecords[j] );
+// }
+// fprintf( stderr, "%d-%d (%c-%c) curm = %5.0f, wmrecords[j]=%f\n", i, j, seq1[0][i], seq2[0][j], curm, wmrecords[j] );
+ wmrecordspt++;
+ wmrecords1pt++;
+ warpipt++;
+ warpjpt++;
+ }
*curpt++ += wm;
ijppt++;
#endif
lastverticalw[i] = currentw[lgth2-1];
- }
+ if( trywarp )
+ {
+ fltncpy( prevwmrecords, wmrecords, lastj );
+ intncpy( prevwarpi, warpi, lastj );
+ intncpy( prevwarpj, warpj, lastj );
+ }
+ }
+// fprintf( stderr, "\nwm = %f\n", wm );
+ if( trywarp )
+ {
+// if( warpn ) fprintf( stderr, "warpn = %d\n", warpn );
+ free( wmrecords );
+ free( prevwmrecords );
+ free( warpi );
+ free( warpj );
+ free( prevwarpi );
+ free( prevwarpj );
+ }
#if 0
fprintf( stderr, "maxwm = %f\n", maxwm );
strcpy( seq1[0], "" );
strcpy( seq2[0], "" );
*off1pt = *off2pt = 0;
+ fprintf( stderr, "maxwm <- 0.0 \n" );
return( 0.0 );
}
- Ltracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, off1pt, off2pt, endali, endalj );
+ Ltracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, off1pt, off2pt, endali, endalj, warpis, warpjs, warpbase );
+ if( warpis ) free( warpis );
+ if( warpjs ) free( warpjs );
resultlen = strlen( mseq1[0] );
fprintf( stderr, ">\n%s\n", mseq1[0] );
fprintf( stderr, ">\n%s\n", mseq2[0] );
+ fprintf( stderr, "*off1pt = %d, *off2pt = %d\n", *off1pt, *off2pt );
+
fprintf( stderr, "maxwm = %f\n", maxwm );
fprintf( stderr, " wm = %f\n", wm );
#endif
return( maxwm );
}
+
+double L__align11_noalign( double **n_dynamicmtx, char **seq1, char **seq2 )
+// warp mitaiou
+{
+// int k;
+ int i, j;
+ int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
+ int lgth1, lgth2;
+// int resultlen;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
+#if 1
+ double *wtmp;
+// int *ijppt;
+ double *mjpt, *prept, *curpt;
+// int *mpjpt;
+#endif
+ static TLS double mi, *m;
+// static TLS int **ijp;
+// static TLS int mpi, *mp;
+ static TLS double *w1, *w2;
+ static TLS double *match;
+ static TLS double *initverticalw; /* kufuu sureba iranai */
+ static TLS double *lastverticalw; /* kufuu sureba iranai */
+// static TLS char **mseq1;
+// static TLS char **mseq2;
+// static TLS char **mseq;
+// static TLS int **intwork;
+// static TLS double **doublework;
+ static TLS int orlgth1 = 0, orlgth2 = 0;
+ static TLS double **amino_dynamicmtx = NULL; // ??
+ double maxwm;
+// int endali = 0, endalj = 0; // by D.Mathog, a guess
+// int endali, endalj;
+ double localthr = -offset;
+ double localthr2 = -offset;
+// double localthr = 100;
+// double localthr2 = 100;
+ double fpenalty = (double)penalty;
+ double fpenalty_ex = (double)penalty_ex;
+
+ if( seq1 == NULL )
+ {
+ if( orlgth1 > 0 && orlgth2 > 0 )
+ {
+ orlgth1 = 0;
+ orlgth2 = 0;
+// free( mseq1 );
+// free( mseq2 );
+ FreeFloatVec( w1 );
+ FreeFloatVec( w2 );
+ FreeFloatVec( match );
+ FreeFloatVec( initverticalw );
+ FreeFloatVec( lastverticalw );
+
+ FreeFloatVec( m );
+// FreeIntVec( mp );
+
+// FreeCharMtx( mseq );
+ if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL;
+
+ }
+ return( 0.0 );
+ }
+
+
+// if( orlgth1 == 0 )
+// {
+// mseq1 = AllocateCharMtx( njob, 0 );
+// mseq2 = AllocateCharMtx( njob, 0 );
+// }
+
+
+ lgth1 = strlen( seq1[0] );
+ lgth2 = strlen( seq2[0] );
+
+ if( lgth1 > orlgth1 || lgth2 > orlgth2 )
+ {
+ int ll1, ll2;
+
+ if( orlgth1 > 0 && orlgth2 > 0 )
+ {
+ FreeFloatVec( w1 );
+ FreeFloatVec( w2 );
+ FreeFloatVec( match );
+ FreeFloatVec( initverticalw );
+ FreeFloatVec( lastverticalw );
+
+ FreeFloatVec( m );
+// FreeIntVec( mp );
+
+// FreeCharMtx( mseq );
+
+
+
+// FreeFloatMtx( doublework );
+// FreeIntMtx( intwork );
+ if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL;
+ }
+
+ ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
+ ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;
+
+#if DEBUG
+ fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );
+#endif
+
+ w1 = AllocateFloatVec( ll2+2 );
+ w2 = AllocateFloatVec( ll2+2 );
+ match = AllocateFloatVec( ll2+2 );
+
+ initverticalw = AllocateFloatVec( ll1+2 );
+ lastverticalw = AllocateFloatVec( ll1+2 );
+
+ m = AllocateFloatVec( ll2+2 );
+// mp = AllocateIntVec( ll2+2 );
+
+// mseq = AllocateCharMtx( njob, ll1+ll2 );
+
+
+// doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
+// intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
+
+#if DEBUG
+ fprintf( stderr, "succeeded\n" );
+#endif
+ amino_dynamicmtx = AllocateDoubleMtx( 0x80, 0x80 );
+ orlgth1 = ll1 - 100;
+ orlgth2 = ll2 - 100;
+ }
+
+ for( i=0; i<nalphabets; i++) for( j=0; j<nalphabets; j++ )
+ amino_dynamicmtx[(int)amino[i]][(int)amino[j]] = (double)n_dynamicmtx[i][j];
+
+
+
+// mseq1[0] = mseq[0];
+// mseq2[0] = mseq[1];
+
+
+// if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )
+// {
+// int ll1, ll2;
+//
+// if( commonAlloc1 && commonAlloc2 )
+// {
+// FreeIntMtx( commonIP );
+// }
+//
+// ll1 = MAX( orlgth1, commonAlloc1 );
+// ll2 = MAX( orlgth2, commonAlloc2 );
+
+#if DEBUG
+// fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
+#endif
+
+// commonIP = AllocateIntMtx( ll1+10, ll2+10 );
+
+#if DEBUG
+// fprintf( stderr, "succeeded\n\n" );
+#endif
+
+// commonAlloc1 = ll1;
+// commonAlloc2 = ll2;
+// }
+// ijp = commonIP;
+
+
+#if 0
+ for( i=0; i<lgth1; i++ )
+ fprintf( stderr, "ogcp1[%d]=%f\n", i, ogcp1[i] );
+#endif
+
+ currentw = w1;
+ previousw = w2;
+
+ match_calc_mtx( amino_dynamicmtx, initverticalw, seq2, seq1, 0, lgth1 );
+
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, 0, lgth2 );
+
+
+ lasti = lgth2+1;
+ for( j=1; j<lasti; ++j )
+ {
+ m[j] = currentw[j-1];
+// mp[j] = 0;
+#if 0
+ if( m[j] < localthr ) m[j] = localthr2;
+#endif
+ }
+
+ lastverticalw[0] = currentw[lgth2-1];
+
+ lasti = lgth1+1;
+
+#if 0
+fprintf( stderr, "currentw = \n" );
+for( i=0; i<lgth1+1; i++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[i] );
+}
+fprintf( stderr, "\n" );
+fprintf( stderr, "initverticalw = \n" );
+for( i=0; i<lgth2+1; i++ )
+{
+ fprintf( stderr, "%5.2f ", initverticalw[i] );
+}
+fprintf( stderr, "\n" );
+#endif
+#if DEBUG2
+ fprintf( stderr, "\n" );
+ fprintf( stderr, " " );
+ for( j=0; j<lgth2; j++ )
+ fprintf( stderr, "%c ", seq2[0][j] );
+ fprintf( stderr, "\n" );
+#endif
+
+ localstop = lgth1+lgth2+1;
+ maxwm = -999999999.9;
+#if DEBUG2
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "%c ", seq1[0][0] );
+
+ for( j=0; j<lgth2+1; j++ )
+ fprintf( stderr, "%5.0f ", currentw[j] );
+ fprintf( stderr, "\n" );
+#endif
+
+ for( i=1; i<lasti; i++ )
+ {
+ wtmp = previousw;
+ previousw = currentw;
+ currentw = wtmp;
+
+ previousw[0] = initverticalw[i-1];
+
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, i, lgth2 );
+#if DEBUG2
+ fprintf( stderr, "%c ", seq1[0][i] );
+ fprintf( stderr, "%5.0f ", currentw[0] );
+#endif
+
+#if XXXXXXX
+fprintf( stderr, "\n" );
+fprintf( stderr, "i=%d\n", i );
+fprintf( stderr, "currentw = \n" );
+for( j=0; j<lgth2; j++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[j] );
+}
+fprintf( stderr, "\n" );
+#endif
+#if XXXXXXX
+fprintf( stderr, "\n" );
+fprintf( stderr, "i=%d\n", i );
+fprintf( stderr, "currentw = \n" );
+for( j=0; j<lgth2; j++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[j] );
+}
+fprintf( stderr, "\n" );
+#endif
+ currentw[0] = initverticalw[i];
+
+ mi = previousw[0];
+// mpi = 0;
+
+#if 0
+ if( mi < localthr ) mi = localthr2;
+#endif
+
+// ijppt = ijp[i] + 1;
+ mjpt = m + 1;
+ prept = previousw;
+ curpt = currentw + 1;
+// mpjpt = mp + 1;
+ lastj = lgth2+1;
+ for( j=1; j<lastj; j++ )
+ {
+ wm = *prept;
+// *ijppt = 0;
+
+#if 0
+ fprintf( stderr, "%5.0f->", wm );
+#endif
+#if 0
+ fprintf( stderr, "%5.0f?", g );
+#endif
+ if( (g=mi+fpenalty) > wm )
+ {
+ wm = g;
+// *ijppt = -( j - mpi );
+ }
+ if( *prept > mi )
+ {
+ mi = *prept;
+// mpi = j-1;
+ }
+
+#if USE_PENALTY_EX
+ mi += fpenalty_ex;
+#endif
+
+#if 0
+ fprintf( stderr, "%5.0f?", g );
+#endif
+ if( (g=*mjpt+fpenalty) > wm )
+ {
+ wm = g;
+// *ijppt = +( i - *mpjpt );
+ }
+ if( *prept > *mjpt )
+ {
+ *mjpt = *prept;
+// *mpjpt = i-1;
+ }
+#if USE_PENALTY_EX
+ *mjpt += fpenalty_ex;
+#endif
+
+ if( maxwm < wm )
+ {
+ maxwm = wm;
+// endali = i;
+// endalj = j;
+ }
+#if 1
+ if( wm < localthr )
+ {
+// fprintf( stderr, "stop i=%d, j=%d, curpt=%f\n", i, j, *curpt );
+// *ijppt = localstop;
+ wm = localthr2;
+ }
+#endif
+#if 0
+ fprintf( stderr, "%5.0f ", *curpt );
+#endif
+#if DEBUG2
+ fprintf( stderr, "%5.0f ", wm );
+// fprintf( stderr, "%c-%c *ijppt = %d, localstop = %d\n", seq1[0][i], seq2[0][j], *ijppt, localstop );
+#endif
+
+ *curpt++ += wm;
+// ijppt++;
+ mjpt++;
+ prept++;
+// mpjpt++;
+ }
+#if DEBUG2
+ fprintf( stderr, "\n" );
+#endif
+
+ lastverticalw[i] = currentw[lgth2-1];
+ }
+
+
+#if 0
+ fprintf( stderr, "maxwm = %f\n", maxwm );
+ fprintf( stderr, "endali = %d\n", endali );
+ fprintf( stderr, "endalj = %d\n", endalj );
+#endif
+
+
+#if 0 // IRUKAMO!!!!
+ if( ijp[endali][endalj] == localstop )
+ {
+ strcpy( seq1[0], "" );
+ strcpy( seq2[0], "" );
+ *off1pt = *off2pt = 0;
+ fprintf( stderr, "maxwm <- 0.0 \n" );
+ return( 0.0 );
+ }
+#else
+ if( maxwm < localthr )
+ {
+ fprintf( stderr, "maxwm <- 0.0 \n" );
+ return( 0.0 );
+ }
+#endif
+
+// Ltracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, off1pt, off2pt, endali, endalj );
+
+
+// resultlen = strlen( mseq1[0] );
+// if( alloclen < resultlen || resultlen > N )
+// {
+// fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
+// ErrorExit( "LENGTH OVER!\n" );
+// }
+
+
+// strcpy( seq1[0], mseq1[0] );
+// strcpy( seq2[0], mseq2[0] );
+
+#if 0
+ fprintf( stderr, "wm=%f\n", wm );
+ fprintf( stderr, ">\n%s\n", mseq1[0] );
+ fprintf( stderr, ">\n%s\n", mseq2[0] );
+
+ fprintf( stderr, "maxwm = %f\n", maxwm );
+ fprintf( stderr, " wm = %f\n", wm );
+#endif
+
+ return( maxwm );
+}
static int reccycle = 0;
-static float localthr;
+static double localthr;
-static void match_ribosum( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )
+static void match_ribosum( double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
{
int j, k, l;
- float scarr[38];
- float **cpmxpd = floatwork;
+ double scarr[38];
+ double **cpmxpd = doublework;
int **cpmxpdn = intwork;
int count = 0;
- float *matchpt;
- float **cpmxpdpt;
+ double *matchpt;
+ double **cpmxpdpt;
int **cpmxpdnpt;
int cpkd;
scarr[l] += ribosumdis[k][l] * cpmx1[i1][k];
}
}
-#if 0 /* ¤³¤ì¤ò»È¤¦¤È¤\ad¤Ïfloatwork¤Î¥¢¥í¥±¡¼¥È¤òµÕ¤Ë¤¹¤ë */
+#if 0 /* ¤³¤ì¤ò»È¤¦¤È¤\ad¤Ïdoublework¤Î¥¢¥í¥±¡¼¥È¤òµÕ¤Ë¤¹¤ë */
{
- float *fpt, **fptpt, *fpt2;
+ double *fpt, **fptpt, *fpt2;
int *ipt, **iptpt;
fpt2 = match;
iptpt = cpmxpdn;
#endif
}
-static void match_calc( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )
+static void match_calc( double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
{
int j, k, l;
- float scarr[26];
- float **cpmxpd = floatwork;
+// double scarr[26];
+ double **cpmxpd = doublework;
int **cpmxpdn = intwork;
int count = 0;
- float *matchpt;
- float **cpmxpdpt;
+ double *matchpt;
+ double **cpmxpdpt;
int **cpmxpdnpt;
int cpkd;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
if( initialize )
{
for( j=0; j<lgth2; j++ )
{
count = 0;
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
if( cpmx2[j][l] )
{
}
}
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
scarr[l] = 0.0;
- for( k=0; k<26; k++ )
+ for( k=0; k<nalphabets; k++ )
{
scarr[l] += (n_dis[k][l]-RNAthr) * cpmx1[i1][k];
}
}
-#if 0 /* ¤³¤ì¤ò»È¤¦¤È¤\ad¤Ïfloatwork¤Î¥¢¥í¥±¡¼¥È¤òµÕ¤Ë¤¹¤ë */
+#if 0 /* ¤³¤ì¤ò»È¤¦¤È¤\ad¤Ïdoublework¤Î¥¢¥í¥±¡¼¥È¤òµÕ¤Ë¤¹¤ë */
{
- float *fpt, **fptpt, *fpt2;
+ double *fpt, **fptpt, *fpt2;
int *ipt, **iptpt;
fpt2 = match;
iptpt = cpmxpdn;
cpmxpdpt++;
}
#endif
+ free( scarr );
}
#if 0
-static void match_add( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )
+static void match_add( double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
{
int j, k, l;
- float scarr[26];
- float **cpmxpd = floatwork;
+ double scarr[nalphabets];
+ double **cpmxpd = doublework;
int **cpmxpdn = intwork;
int count = 0;
- float *matchpt;
- float **cpmxpdpt;
+ double *matchpt;
+ double **cpmxpdpt;
int **cpmxpdnpt;
int cpkd;
for( j=0; j<lgth2; j++ )
{
count = 0;
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
if( cpmx2[j][l] )
{
}
}
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
scarr[l] = 0.0;
- for( k=0; k<26; k++ )
+ for( k=0; k<nalphabets; k++ )
{
scarr[l] += n_dis[k][l] * cpmx1[i1][k];
}
}
-#if 0 /* ¤³¤ì¤ò»È¤¦¤È¤\ad¤Ïfloatwork¤Î¥¢¥í¥±¡¼¥È¤òµÕ¤Ë¤¹¤ë */
+#if 0 /* ¤³¤ì¤ò»È¤¦¤È¤\ad¤Ïdoublework¤Î¥¢¥í¥±¡¼¥È¤òµÕ¤Ë¤¹¤ë */
{
- float *fpt, **fptpt, *fpt2;
+ double *fpt, **fptpt, *fpt2;
int *ipt, **iptpt;
fpt2 = match;
iptpt = cpmxpdn;
#endif
#if 0
-static float Atracking(
+static double Atracking(
char **seq1, char **seq2,
char **mseq1, char **mseq2,
int **ijp, int icyc, int jcyc,
#endif
-static float MSalign2m2m_rec( int icyc, int jcyc, double *eff1, double *eff2, char **seq1, char **seq2, float **cpmx1, float **cpmx2, int ist, int ien, int jst, int jen, int alloclen, char **mseq1, char **mseq2, int depth, float **gapinfo, float **map )
+static double MSalign2m2m_rec( int icyc, int jcyc, double *eff1, double *eff2, char **seq1, char **seq2, double **cpmx1, double **cpmx2, int ist, int ien, int jst, int jen, int alloclen, char **mseq1, char **mseq2, int depth, double **gapinfo, double **map )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
- float value = 0.0;
+ double value = 0.0;
register int i, j;
char **aseq1, **aseq2;
int ll1, ll2;
int lasti, lastj, imid, jmid = 0;
- float wm = 0.0; /* int ?????? */
- float g;
- float *currentw, *previousw;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
#if USE_PENALTY_EX
- float fpenalty_ex = (float)penalty_ex;
+ double fpenalty_ex = (double)penalty_ex;
#endif
-// float fpenalty = (float)penalty;
- float *wtmp;
+// double fpenalty = (double)penalty;
+ double *wtmp;
// short *ijppt;
int *mpjpt;
// short **ijp;
int *mp;
int mpi;
- float *mjpt, *prept, *curpt;
- float mi;
- float *m;
- float *w1, *w2;
-// float *match;
- float *initverticalw; /* kufuu sureba iranai */
- float *lastverticalw; /* kufuu sureba iranai */
+ double *mjpt, *prept, *curpt;
+ double mi;
+ double *m;
+ double *w1, *w2;
+// double *match;
+ double *initverticalw; /* kufuu sureba iranai */
+ double *lastverticalw; /* kufuu sureba iranai */
int **intwork;
- float **floatwork;
+ double **doublework;
// short **shortmtx;
#if STOREWM
- float **WMMTX;
- float **WMMTX2;
+ double **WMMTX;
+ double **WMMTX2;
#endif
- float *midw;
- float *midm;
- float *midn;
+ double *midw;
+ double *midm;
+ double *midn;
int lgth1, lgth2;
- float maxwm = 0.0;
+ double maxwm = 0.0;
int *jumpforwi;
int *jumpforwj;
int *jumpbacki;
int jumpi, jumpj = 0;
char *gaps;
int ijpi, ijpj;
- float *ogcp1;
- float *fgcp1;
- float *ogcp2;
- float *fgcp2;
- float firstm;
+ double *ogcp1;
+ double *fgcp1;
+ double *ogcp2;
+ double *fgcp2;
+ double firstm;
int firstmp;
#if 0
static char ttt1[50000];
mp = AllocateIntVec( ll2+2 );
gaps = AllocateCharVec( MAX( ll1, ll2 ) + 2 );
- floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, 26 );
- intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, 26 );
+ doublework = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets );
+ intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets );
#if DEBUG
fprintf( stderr, "succeeded\n" );
currentw = w1;
previousw = w2;
- match_ribosum( initverticalw, cpmx2+jst, cpmx1+ist, 0, lgth1, floatwork, intwork, 1 );
+ match_ribosum( initverticalw, cpmx2+jst, cpmx1+ist, 0, lgth1, doublework, intwork, 1 );
- match_ribosum( currentw, cpmx1+ist, cpmx2+jst, 0, lgth2, floatwork, intwork, 1 );
+ match_ribosum( currentw, cpmx1+ist, cpmx2+jst, 0, lgth2, doublework, intwork, 1 );
for( i=1; i<lgth1+1; i++ )
{
previousw[0] = initverticalw[i-1];
- match_ribosum( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, floatwork, intwork, 0 );
+ match_ribosum( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 );
currentw[0] = initverticalw[i];
m[0] = ogcp1[i];
// gyakudp
- match_ribosum( initverticalw, cpmx2+jst, cpmx1+ist, lgth2-1, lgth1, floatwork, intwork, 1 );
- match_ribosum( currentw, cpmx1+ist, cpmx2+jst, lgth1-1, lgth2, floatwork, intwork, 1 );
+ match_ribosum( initverticalw, cpmx2+jst, cpmx1+ist, lgth2-1, lgth1, doublework, intwork, 1 );
+ match_ribosum( currentw, cpmx1+ist, cpmx2+jst, lgth1-1, lgth2, doublework, intwork, 1 );
for( i=0; i<lgth1-1; i++ )
{
currentw = wtmp;
previousw[lgth2-1] = initverticalw[i+1];
// match_calc( currentw, seq1, seq2, i, lgth2 );
- match_ribosum( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, floatwork, intwork, 0 );
+ match_ribosum( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 );
currentw[lgth2-1] = initverticalw[i];
#if 0
for( i=0; i<lgth1; i++ )
{
- float maxpairscore = -9999.9;
- float tmpscore;
+ double maxpairscore = -9999.9;
+ double tmpscore;
for( j=0; j<lgth2; j++ )
{
}
for( j=0; j<lgth2; j++ )
{
- float maxpairscore = -9999.9;
- float tmpscore;
+ double maxpairscore = -9999.9;
+ double tmpscore;
for( i=0; i<lgth1; i++ )
{
FreeFloatVec( m );
FreeIntVec( mp );
- FreeFloatMtx( floatwork );
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
#if STOREWM
return( value );
}
-static float MSalignmm_rec( int icyc, int jcyc, double *eff1, double *eff2, char **seq1, char **seq2, float **cpmx1, float **cpmx2, int ist, int ien, int jst, int jen, int alloclen, char **mseq1, char **mseq2, int depth, float **gapinfo, float **map )
+static double MSalignmm_rec( int icyc, int jcyc, double *eff1, double *eff2, char **seq1, char **seq2, double **cpmx1, double **cpmx2, int ist, int ien, int jst, int jen, int alloclen, char **mseq1, char **mseq2, int depth, double **gapinfo, double **map )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
- int alnlen;
- float value = 0.0;
+ double value = 0.0;
register int i, j;
char **aseq1, **aseq2;
- int ll1, ll2, l, len;
+ int ll1, ll2;
int lasti, lastj, imid, jmid=0;
- float wm = 0.0; /* int ?????? */
- float g;
- float *currentw, *previousw;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
#if USE_PENALTY_EX
- float fpenalty_ex = (float)RNApenalty_ex;
+ double fpenalty_ex = (double)RNApenalty_ex;
#endif
-// float fpenalty = (float)penalty;
- float *wtmp;
+// double fpenalty = (double)penalty;
+ double *wtmp;
// short *ijppt;
int *mpjpt;
// short **ijp;
int *mp;
int mpi;
- float *mjpt, *prept, *curpt;
- float mi;
- float *m;
- float *w1, *w2;
-// float *match;
- float *initverticalw; /* kufuu sureba iranai */
- float *lastverticalw; /* kufuu sureba iranai */
+ double *mjpt, *prept, *curpt;
+ double mi;
+ double *m;
+ double *w1, *w2;
+// double *match;
+ double *initverticalw; /* kufuu sureba iranai */
+ double *lastverticalw; /* kufuu sureba iranai */
int **intwork;
- float **floatwork;
+ double **doublework;
// short **shortmtx;
#if STOREWM
- float **WMMTX;
- float **WMMTX2;
+ double **WMMTX;
+ double **WMMTX2;
#endif
- float *midw;
- float *midm;
- float *midn;
+ double *midw;
+ double *midm;
+ double *midn;
int lgth1, lgth2;
- float maxwm = 0.0;
+ double maxwm = 0.0;
int *jumpforwi;
int *jumpforwj;
int *jumpbacki;
int jumpi, jumpj = 0;
char *gaps;
int ijpi, ijpj;
- float *ogcp1;
- float *fgcp1;
- float *ogcp2;
- float *fgcp2;
- float firstm;
+ double *ogcp1;
+ double *fgcp1;
+ double *ogcp2;
+ double *fgcp2;
+ double firstm;
int firstmp;
#if 0
static char ttt1[50000];
mp = AllocateIntVec( ll2+2 );
gaps = AllocateCharVec( MAX( ll1, ll2 ) + 2 );
- floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, 26 );
- intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, 26 );
+ doublework = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets );
+ intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets );
#if DEBUG
fprintf( stderr, "succeeded\n" );
currentw = w1;
previousw = w2;
- match_calc( initverticalw, cpmx2+jst, cpmx1+ist, 0, lgth1, floatwork, intwork, 1 );
+ match_calc( initverticalw, cpmx2+jst, cpmx1+ist, 0, lgth1, doublework, intwork, 1 );
- match_calc( currentw, cpmx1+ist, cpmx2+jst, 0, lgth2, floatwork, intwork, 1 );
+ match_calc( currentw, cpmx1+ist, cpmx2+jst, 0, lgth2, doublework, intwork, 1 );
for( i=1; i<lgth1+1; i++ )
{
previousw[0] = initverticalw[i-1];
- match_calc( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, floatwork, intwork, 0 );
+ match_calc( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 );
currentw[0] = initverticalw[i];
m[0] = ogcp1[i];
// gyakudp
- match_calc( initverticalw, cpmx2+jst, cpmx1+ist, lgth2-1, lgth1, floatwork, intwork, 1 );
- match_calc( currentw, cpmx1+ist, cpmx2+jst, lgth1-1, lgth2, floatwork, intwork, 1 );
+ match_calc( initverticalw, cpmx2+jst, cpmx1+ist, lgth2-1, lgth1, doublework, intwork, 1 );
+ match_calc( currentw, cpmx1+ist, cpmx2+jst, lgth1-1, lgth2, doublework, intwork, 1 );
for( i=0; i<lgth1-1; i++ )
{
currentw = wtmp;
previousw[lgth2-1] = initverticalw[i+1];
// match_calc( currentw, seq1, seq2, i, lgth2 );
- match_calc( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, floatwork, intwork, 0 );
+ match_calc( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 );
currentw[lgth2-1] = initverticalw[i];
#if 0
for( i=0; i<lgth1; i++ )
{
- float maxpairscore = -9999.9;
- float tmpscore;
+ double maxpairscore = -9999.9;
+ double tmpscore;
for( j=0; j<lgth2; j++ )
{
}
for( j=0; j<lgth2; j++ )
{
- float maxpairscore = -9999.9;
- float tmpscore;
+ double maxpairscore = -9999.9;
+ double tmpscore;
for( i=0; i<lgth1; i++ )
{
FreeFloatVec( m );
FreeIntVec( mp );
- FreeFloatMtx( floatwork );
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
#if STOREWM
FreeFloatMtx( WMMTX2 );
#endif
- return( value );
-
-// fprintf( stderr, "==== calling myself (first)\n" );
-
-#if 0
- fprintf( stderr, "seq1[0] = %s\n", seq1[0] );
- fprintf( stderr, "seq2[0] = %s\n", seq2[0] );
-#endif
- value = MSalignmm_rec( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, ist, ist+jumpi, jst, jst+jumpj, alloclen, aseq1, aseq2, depth, gapinfo, map );
-#if 0
- fprintf( stderr, "aseq1[0] = %s\n", aseq1[0] );
- fprintf( stderr, "aseq2[0] = %s\n", aseq2[0] );
-#endif
-#if MEMSAVE
-#else
- for( i=0; i<icyc; i++ ) strcpy( mseq1[i], aseq1[i] );
- for( i=0; i<jcyc; i++ ) strcpy( mseq2[i], aseq2[i] );
-#endif
-
-// fprintf( stderr, "====(f) aseq1[0] (%d) = %s (%d-%d)\n", depth, aseq1[0], ist, ien );
-// fprintf( stderr, "====(f) aseq2[0] (%d) = %s (%d-%d)\n", depth, aseq2[0], jst, jen );
-
- len = strlen( mseq1[0] );
-// fprintf( stderr, "len = %d\n", len );
- l = jmid - jumpj - 1;
-// fprintf( stderr, "l=%d\n", l );
- if( l > 0 )
- {
- for( i=0; i<l; i++ ) gaps[i] = '-'; gaps[i] = 0;
- for( i=0; i<icyc; i++ )
- {
- strcat( mseq1[i], gaps );
- mseq1[i][len+l] = 0;
- }
- for( j=0; j<jcyc; j++ )
- {
- strncat( mseq2[j], seq2[j]+jst+jumpj+1, l );
- mseq2[j][len+l] = 0;
- }
-// fprintf( stderr, "penalizing (2) .. %f(%d), %f(%d)\n", ogcp2[jumpj+1], jumpj+1, fgcp2[jmid-1], jmid-1 );
- value += ( ogcp2[jumpj+1] + fgcp2[jmid-1] );
-// value += fpenalty;
- }
- len = strlen( mseq1[0] );
- l = imid - jumpi - 1;
-// fprintf( stderr, "l=%d\n", l );
- if( l > 0 )
- {
- for( i=0; i<l; i++ ) gaps[i] = '-'; gaps[i] = 0;
- for( i=0; i<icyc; i++ )
- {
- strncat( mseq1[i], seq1[i]+ist+jumpi+1, l );
- mseq1[i][len+l] = 0;
- }
- for( j=0; j<jcyc; j++ )
- {
- strcat( mseq2[j], gaps );
- mseq2[j][len+l] = 0;
- }
-
-// for( i=0; i<lgth1; i++ ) fprintf( stderr, "ogcp1[%d] = %f\n", i, ogcp1[i] );
-// for( i=0; i<lgth1; i++ ) fprintf( stderr, "fgcp1[%d] = %f\n", i, fgcp1[i] );
-
-
-// fprintf( stderr, "penalizing (1) .. ogcp1[%d] = %f, fgcp1[%d] = %f\n", jumpi+1, ogcp1[jumpi+1], imid-1, fgcp1[imid-1] );
- value += ( ogcp1[jumpi+1] + fgcp1[imid-1] );
-// value += fpenalty;
- }
-#if 0
- for( i=0; i<icyc; i++ ) fprintf( stderr, "after gapfill mseq1[%d]=%s\n", i, mseq1[i] );
- for( i=0; i<jcyc; i++ ) fprintf( stderr, "after gapfill mseq2[%d]=%s\n", i, mseq2[i] );
-#endif
-
-// fprintf( stderr, "==== calling myself (second)\n" );
-
-#if MEMSAVE
- alnlen = strlen( aseq1[0] );
- for( i=0; i<icyc; i++ ) aseq1[i] += alnlen;
- for( i=0; i<jcyc; i++ ) aseq2[i] += alnlen;
-#endif
-
-#if 0
- fprintf( stderr, "seq1[0] = %s\n", seq1[0] );
- fprintf( stderr, "seq2[0] = %s\n", seq2[0] );
-#endif
- value += MSalignmm_rec( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, ist+imid, ien, jst+jmid, jen, alloclen, aseq1, aseq2, depth, gapinfo, map );
-#if 0
- fprintf( stderr, "aseq1[0] = %s\n", aseq1[0] );
- fprintf( stderr, "aseq2[0] = %s\n", aseq2[0] );
-#endif
-
-
-
-#if DEBUG
- if( value - maxwm > 1 || maxwm - value > 1 )
- {
- fprintf( stderr, "WARNING value = %f, but maxwm = %f\n", value, maxwm );
- for( i=0; i<icyc; i++ )
- {
- fprintf( stderr, ">1-%d\n%s\n", i, mseq1[i] );
- fprintf( stderr, "%s\n", aseq1[i] );
- }
- for( i=0; i<jcyc; i++ )
- {
- fprintf( stderr, ">2-%d\n%s\n", i, mseq2[i] );
- fprintf( stderr, "%s\n", aseq2[i] );
- }
-
-// exit( 1 );
- }
- else
- {
- fprintf( stderr, "value = %.0f, maxwm = %.0f -> ok\n", value, maxwm );
- }
-#endif
-
-#if MEMSAVE
-#else
- for( i=0; i<icyc; i++ ) strcat( mseq1[i], aseq1[i] );
- for( i=0; i<jcyc; i++ ) strcat( mseq2[i], aseq2[i] );
-#endif
-
-// fprintf( stderr, "====(s) aseq1[0] (%d) = %s (%d-%d)\n", depth, aseq1[0], ist, ien );
-// fprintf( stderr, "====(s) aseq2[0] (%d) = %s (%d-%d)\n", depth, aseq2[0], jst, jen );
free( gaps );
#if MEMSAVE
-float Lalignmm_hmout( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *sgap1, char *sgap2, char *egap1, char *egap2, float **map )
+double Lalignmm_hmout( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *sgap1, char *sgap2, char *egap1, char *egap2, double **map )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
// int k;
int i, j;
int ll1, ll2;
int lgth1, lgth2;
- float wm = 0.0; /* int ?????? */
+ double wm = 0.0; /* int ?????? */
char **mseq1;
char **mseq2;
// char **mseq;
- float *ogcp1;
- float *ogcp2;
- float *fgcp1;
- float *fgcp2;
- float **cpmx1;
- float **cpmx2;
- float **gapinfo;
-// float fpenalty;
- float fpenalty = (float)RNApenalty;
+ double *ogcp1;
+ double *ogcp2;
+ double *fgcp1;
+ double *fgcp2;
+ double **cpmx1;
+ double **cpmx2;
+ double **gapinfo;
+// double fpenalty;
+ double fpenalty = (double)RNApenalty;
int nglen1, nglen2;
fgcp2 = AllocateFloatVec( ll2+2 );
- cpmx1 = AllocateFloatMtx( ll1+2, 27 );
- cpmx2 = AllocateFloatMtx( ll2+2, 27 );
+ cpmx1 = AllocateFloatMtx( ll1+2, nalphabets+1 );
+ cpmx2 = AllocateFloatMtx( ll2+2, nalphabets+1 );
for( i=0; i<icyc; i++ )
{
return( wm );
}
-float Lalign2m2m_hmout( char **seq1, char **seq2, char **seq1r, char **seq2r, char *dir1, char *dir2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *sgap1, char *sgap2, char *egap1, char *egap2, float **map )
+double Lalign2m2m_hmout( char **seq1, char **seq2, char **seq1r, char **seq2r, char *dir1, char *dir2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *sgap1, char *sgap2, char *egap1, char *egap2, double **map )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
// int k;
int i, j;
int ll1, ll2;
int lgth1, lgth2;
- float wm = 0.0; /* int ?????? */
+ double wm = 0.0; /* int ?????? */
char **mseq1;
char **mseq2;
- float *ogcp1;
- float *ogcp2;
- float *fgcp1;
- float *fgcp2;
- float **cpmx1;
- float **cpmx2;
- float **gapinfo;
- float fpenalty = (float)penalty;
+ double *ogcp1;
+ double *ogcp2;
+ double *fgcp1;
+ double *fgcp2;
+ double **cpmx1;
+ double **cpmx2;
+ double **gapinfo;
+ double fpenalty = (double)penalty;
int nglen1, nglen2;
#if 0
fprintf( stderr, "added %c to mseq2, mseq2 = %s \n", seq2[0][j], mseq2[0] );
}
-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
+static void match_calc( double *match, char **s1, char **s2, int i1, int lgth2 )
{
char tmpc = s1[0][i1];
char *seq2 = s2[0];
*match++ = amino_dis[(int)tmpc][(int)*seq2++];
}
-static float Atracking( float *lasthorizontalw, float *lastverticalw,
+static double Atracking( double *lasthorizontalw, double *lastverticalw,
char **seq1, char **seq2,
char **mseq1, char **mseq2,
- float **cpmx1, float **cpmx2,
+ double **cpmx1, double **cpmx2,
int **ijp )
{
int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;
return( 0.0 );
}
-void backdp( float **WMMTX, float wmmax, float *maxinw, float *maxinh, int lgth1, int lgth2, int alloclen, float *w1, float *w2, float *initverticalw, float *m, int *mp, int iin, int jin, char **seq1, char **seq2, char **mseq1, char **mseq2 )
+void backdp( double **WMMTX, double wmmax, double *maxinw, double *maxinh, int lgth1, int lgth2, int alloclen, double *w1, double *w2, double *initverticalw, double *m, int *mp, int iin, int jin, char **seq1, char **seq2, char **mseq1, char **mseq2 )
{
register int i, j;
int prevhiti, prevhitj;
// int lasti, lastj;
- float g;
- float fpenalty = (float)penalty;
+ double g;
+ double fpenalty = (double)penalty;
#if USE_PENALTY_EX
- float fpenalty_ex = (float)penalty_ex;
+ double fpenalty_ex = (double)penalty_ex;
#endif
- float *currentw, *previousw, *wtmp;
- float mi;
+ double *currentw, *previousw, *wtmp;
+ double mi;
int mpi;
int *mpjpt;
- float *mjpt, *prept, *curpt;
- float wm = 0.0;
- float forwwm;
+ double *mjpt, *prept, *curpt;
+ double wm = 0.0;
+ double forwwm;
currentw = w1;
previousw = w2;
}
-float MSalign11( char **seq1, char **seq2, int alloclen )
+double MSalign11( char **seq1, char **seq2, int alloclen )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
// int k;
int iin = 0, jin = 0; // by Mathog, a guess
int lgth1, lgth2;
int resultlen;
- float wm = 0.0; /* int ?????? */
- float g;
- float *currentw, *previousw;
- float fpenalty = (float)penalty;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
+ double fpenalty = (double)penalty;
#if USE_PENALTY_EX
- float fpenalty_ex = (float)penalty_ex;
+ double fpenalty_ex = (double)penalty_ex;
#endif
- float *maxinw = NULL, *maxinwpt = NULL; // by D.Mathog, guess
- float *maxinh = NULL; // by D.Mathog, guess
+ double *maxinw = NULL, *maxinwpt = NULL; // by D.Mathog, guess
+ double *maxinh = NULL; // by D.Mathog, guess
#if 1
- float wmmax;
- float *wtmp;
+ double wmmax;
+ double *wtmp;
int *ijppt;
- float *mjpt, *prept, *curpt;
+ double *mjpt, *prept, *curpt;
int *mpjpt;
#endif
- static float mi, *m;
+ static double mi, *m;
static int **ijp;
static int mpi, *mp;
- static float *w1, *w2;
- static float *match;
- static float *initverticalw; /* kufuu sureba iranai */
- static float *lastverticalw; /* kufuu sureba iranai */
+ static double *w1, *w2;
+ static double *match;
+ static double *initverticalw; /* kufuu sureba iranai */
+ static double *lastverticalw; /* kufuu sureba iranai */
static char **mseq1;
static char **mseq2;
static char **mseq;
- static float **cpmx1;
- static float **cpmx2;
+ static double **cpmx1;
+ static double **cpmx2;
static int **intwork;
- static float **WMMTX;
- static float **floatwork;
+ static double **WMMTX;
+ static double **doublework;
static int orlgth1 = 0, orlgth2 = 0;
if( orlgth1 == 0 )
FreeFloatMtx( cpmx1 );
FreeFloatMtx( cpmx2 );
- FreeFloatMtx( floatwork );
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
}
mseq = AllocateCharMtx( njob, ll1+ll2 );
- cpmx1 = AllocateFloatMtx( 26, ll1+2 );
- cpmx2 = AllocateFloatMtx( 26, ll2+2 );
+ cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 );
+ cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 );
- floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 );
- intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 );
+ doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
+ intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
#if DEBUG
fprintf( stderr, "succeeded\n" );
static TLS int reccycle = 0;
-static void match_calc( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )
+static void match_calc_add( double **scoringmtx, double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
{
int j, k, l;
- float scarr[26];
- float **cpmxpd = floatwork;
+// double scarr[26];
+ double **cpmxpd = doublework;
int **cpmxpdn = intwork;
int count = 0;
- float *matchpt;
- float **cpmxpdpt;
+ double *matchpt;
+ double **cpmxpdpt;
int **cpmxpdnpt;
int cpkd;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
+ if( initialize )
+ {
+ for( j=0; j<lgth2; j++ )
+ {
+ count = 0;
+ for( l=0; l<nalphabets; l++ )
+ {
+ if( cpmx2[j][l] )
+ {
+ cpmxpd[j][count] = cpmx2[j][l];
+ cpmxpdn[j][count] = l;
+ count++;
+ }
+ }
+ cpmxpdn[j][count] = -1;
+ }
+ }
+
+ for( l=0; l<nalphabets; l++ )
+ {
+ scarr[l] = 0.0;
+ for( k=0; k<nalphabets; k++ )
+ {
+// scarr[l] += n_dis[k][l] * cpmx1[i1][k];
+ scarr[l] += scoringmtx[k][l] * cpmx1[i1][k];
+ }
+ }
+#if 0 /* ¤³¤ì¤ò»È¤¦¤È¤\ad¤Ïdoublework¤Î¥¢¥í¥±¡¼¥È¤òµÕ¤Ë¤¹¤ë */
+ {
+ double *fpt, **fptpt, *fpt2;
+ int *ipt, **iptpt;
+ fpt2 = match;
+ iptpt = cpmxpdn;
+ fptpt = cpmxpd;
+ while( lgth2-- )
+ {
+ *fpt2 = 0.0;
+ ipt=*iptpt,fpt=*fptpt;
+ while( *ipt > -1 )
+ *fpt2 += scarr[*ipt++] * *fpt++;
+ fpt2++,iptpt++,fptpt++;
+ }
+ }
+ for( j=0; j<lgth2; j++ )
+ {
+ match[j] = 0.0;
+ for( k=0; cpmxpdn[j][k]>-1; k++ )
+ match[j] += scarr[cpmxpdn[j][k]] * cpmxpd[j][k];
+ }
+#else
+ matchpt = match;
+ cpmxpdnpt = cpmxpdn;
+ cpmxpdpt = cpmxpd;
+ while( lgth2-- )
+ {
+// *matchpt = 0.0; // add dakara
+ for( k=0; (cpkd=(*cpmxpdnpt)[k])>-1; k++ )
+ *matchpt += scarr[cpkd] * (*cpmxpdpt)[k];
+ matchpt++;
+ cpmxpdnpt++;
+ cpmxpdpt++;
+ }
+#endif
+ free( scarr );
+}
+static void match_calc( double **n_dynamicmtx, double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
+{
+ int j, k, l;
+// double scarr[26];
+ double **cpmxpd = doublework;
+ int **cpmxpdn = intwork;
+ int count = 0;
+ double *matchpt;
+ double **cpmxpdpt;
+ int **cpmxpdnpt;
+ int cpkd;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
if( initialize )
{
for( j=0; j<lgth2; j++ )
{
count = 0;
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
if( cpmx2[j][l] )
{
}
}
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
scarr[l] = 0.0;
- for( k=0; k<26; k++ )
+ for( k=0; k<nalphabets; k++ )
{
- scarr[l] += n_dis[k][l] * cpmx1[i1][k];
+// scarr[l] += n_dis[k][l] * cpmx1[i1][k];
+ scarr[l] += n_dynamicmtx[k][l] * cpmx1[i1][k];
}
}
-#if 0 /* ¤³¤ì¤ò»È¤¦¤È¤\ad¤Ïfloatwork¤Î¥¢¥í¥±¡¼¥È¤òµÕ¤Ë¤¹¤ë */
+#if 0 /* ¤³¤ì¤ò»È¤¦¤È¤\ad¤Ïdoublework¤Î¥¢¥í¥±¡¼¥È¤òµÕ¤Ë¤¹¤ë */
{
- float *fpt, **fptpt, *fpt2;
+ double *fpt, **fptpt, *fpt2;
int *ipt, **iptpt;
fpt2 = match;
iptpt = cpmxpdn;
cpmxpdpt++;
}
#endif
+ free( scarr );
}
-static float Atracking( float *lasthorizontalw, float *lastverticalw,
+static double Atracking( double *lasthorizontalw, double *lastverticalw,
char **seq1, char **seq2,
char **mseq1, char **mseq2,
int **ijp, int icyc, int jcyc,
int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, klim;
char *gaptable1, *gt1bk;
char *gaptable2, *gt2bk;
- float wm;
+ double wm;
lgth1 = ien-ist+1;
lgth2 = jen-jst+1;
return( 0.0 );
}
-static float MSalignmm_tanni( int icyc, int jcyc, double *eff1, double *eff2, char **seq1, char **seq2, float **cpmx1, float **cpmx2, int ist, int ien, int jst, int jen, int alloclen, int fulllen1, int fulllen2, char **mseq1, char **mseq2, float **gapinfo, int headgp, int tailgp )
+static double MSalignmm_tanni( double **n_dynamicmtx, int icyc, int jcyc, double *eff1, double *eff2, char **seq1, char **seq2, double **cpmx1, double **cpmx2, int ist, int ien, int jst, int jen, int alloclen, int fulllen1, int fulllen2, char **mseq1, char **mseq2, double **gapinfo, int headgp, int tailgp, double headgapfreq1_g, double headgapfreq2_g )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
// int k;
register int i, j;
int ll1, ll2;
int lasti, lastj;
- float wm = 0.0; /* int ?????? */
- float g;
- float *currentw, *previousw;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
#if USE_PENALTY_EX
- float fpenalty_ex = (float)penalty_ex;
+ double fpenalty_ex = (double)penalty_ex;
#endif
#if 1
- float *wtmp;
+ double *wtmp;
int *ijppt;
- float *mjpt, *prept, *curpt;
+ double *mjpt, *prept, *curpt;
int *mpjpt;
#endif
- float mi, *m;
+ double mi, *m;
int **ijp;
int mpi, *mp;
- float *w1, *w2;
- float *initverticalw; /* kufuu sureba iranai */
- float *lastverticalw; /* kufuu sureba iranai */
+ double *w1, *w2;
+ double *initverticalw; /* kufuu sureba iranai */
+ double *lastverticalw; /* kufuu sureba iranai */
int **intwork;
- float **floatwork;
+ double **doublework;
int **intmtx;
int lgth1, lgth2;
- float *ogcp1;
- float *fgcp1;
- float *ogcp2;
- float *fgcp2;
+ double *ogcp1;
+ double *fgcp1;
+ double *ogcp2;
+ double *fgcp2;
+ double *gapfreq1f;
+ double *gapfreq2f;
// char **aseq1;
// char **aseq2;
// char **aseq1bk, **aseq2bk;
+ double headgapfreq1;
+ double headgapfreq2;
ogcp1 = gapinfo[0] + ist;
fgcp1 = gapinfo[1] + ist;
ogcp2 = gapinfo[2] + jst;
fgcp2 = gapinfo[3] + jst;
+ gapfreq1f = gapinfo[4] + ist;
+ gapfreq2f = gapinfo[5] + jst;
+
+ if( ist > 0 ) headgapfreq1 = gapfreq1f[-1];
+ else headgapfreq1 = headgapfreq1_g;
+ if( jst > 0 ) headgapfreq2 = gapfreq2f[-1];
+ else headgapfreq2 = headgapfreq2_g;
#if STOREWM
char ttt1[10000], ttt2[10000];
m = AllocateFloatVec( ll2+2 );
mp = AllocateIntVec( ll2+2 );
- floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, 27 );
- intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, 27 );
+ doublework = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets+1 );
+ intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets+1 );
intmtx = AllocateIntMtx( ll1+1, ll2+1 );
currentw = w1;
previousw = w2;
- match_calc( initverticalw, cpmx2+jst, cpmx1+ist, 0, lgth1, floatwork, intwork, 1 );
+ match_calc( n_dynamicmtx, initverticalw, cpmx2+jst, cpmx1+ist, 0, lgth1, doublework, intwork, 1 );
- match_calc( currentw, cpmx1+ist, cpmx2+jst, 0, lgth2, floatwork, intwork, 1 );
+ match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, 0, lgth2, doublework, intwork, 1 );
if( headgp || ist != 0 )
{
for( i=1; i<lgth1+1; i++ )
{
- initverticalw[i] += ( ogcp1[0] + fgcp1[i-1] ) ;
+ initverticalw[i] += ( ogcp1[0] * headgapfreq2 + fgcp1[i-1] * gapfreq2f[0] ) ;
}
}
if( headgp || jst != 0 )
{
for( j=1; j<lgth2+1; j++ )
{
- currentw[j] += ( ogcp2[0] + fgcp2[j-1] ) ;
+ currentw[j] += ( ogcp2[0] * headgapfreq1 + fgcp2[j-1] * gapfreq1f[0] ) ;
}
}
for( j=1; j<lgth2+1; ++j )
{
- m[j] = currentw[j-1] + ogcp1[1]; mp[j] = 0;;
+ m[j] = currentw[j-1] + ogcp1[1] * gapfreq2f[j-1]; mp[j] = 0;;
}
lastverticalw[0] = currentw[lgth2-1];
previousw[0] = initverticalw[i-1];
- match_calc( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, floatwork, intwork, 0 );
+ match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 );
currentw[0] = initverticalw[i];
- mi = previousw[0] + ogcp2[1];
+ mi = previousw[0] + ogcp2[1] * gapfreq1f[i-1];
mpi = 0;
ijppt = ijp[i] + 1;
#if 0
fprintf( stderr, "%5.0f->", wm );
#endif
- g = mi + fgcp2[j-1];
+ g = mi + fgcp2[j-1] * gapfreq1f[i];
#if 0
fprintf( stderr, "%5.0f?", g );
#endif
wm = g;
*ijppt = -( j - mpi );
}
- g = *prept + ogcp2[j];
+ g = *prept + ogcp2[j] * gapfreq1f[i-1];
if( g >= mi )
{
mi = g;
mi += fpenalty_ex;
#endif
- g = *mjpt + fgcp1[i-1];
+ g = *mjpt + fgcp1[i-1] * gapfreq2f[j];
#if 0
fprintf( stderr, "%5.0f?", g );
#endif
wm = g;
*ijppt = +( i - *mpjpt );
}
- g = *prept + ogcp1[i];
+ g = *prept + ogcp1[i] * gapfreq2f[j-1];
if( g >= *mjpt )
{
*mjpt = g;
FreeIntVec( mp );
- FreeFloatMtx( floatwork );
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
FreeIntMtx( intmtx );
}
static void freearrays_rec1(
- float *w1, float *w2, float *initverticalw, float *lastverticalw,
- float *midw, float *midm, float *midn,
+ double *w1, double *w2, double *initverticalw, double *lastverticalw,
+ double *midw, double *midm, double *midn,
int *jumpbacki, int *jumpbackj, int *jumpforwi, int *jumpforwj, int *jumpdummi, int *jumpdummj,
- float *m, int *mp,
- float **floatwork, int **intwork
+ double *m, int *mp,
+ double **doublework, int **intwork
#if STOREWM
- , float **WMMTX, float **WMMTX2
+ , double **WMMTX, double **WMMTX2
#endif
)
{
FreeFloatVec( m );
FreeIntVec( mp );
- FreeFloatMtx( floatwork );
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
#if STOREWM
#endif
}
-static float MSalignmm_rec( int icyc, int jcyc, double *eff1, double *eff2, char **seq1, char **seq2, float **cpmx1, float **cpmx2, int ist, int ien, int jst, int jen, int alloclen, int fulllen1, int fulllen2, char **mseq1, char **mseq2, int depth, float **gapinfo, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp )
+static double MSalignmm_rec( double **n_dynamicmtx, int icyc, int jcyc, double *eff1, double *eff2, char **seq1, char **seq2, double **cpmx1, double **cpmx2, int ist, int ien, int jst, int jen, int alloclen, int fulllen1, int fulllen2, char **mseq1, char **mseq2, int depth, double **gapinfo, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp, double headgapfreq1_g, double headgapfreq2_g )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
// int k;
int alnlen;
- float value = 0.0;
+ double value = 0.0;
register int i, j;
char **aseq1, **aseq2;
int ll1, ll2, l, len;
int lasti, lastj, imid;
int jmid = 0; // by D.Mathog, a guess
- float wm = 0.0; /* int ?????? */
- float g;
- float *currentw, *previousw;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
#if USE_PENALTY_EX
- float fpenalty_ex = (float)penalty_ex;
+ double fpenalty_ex = (double)penalty_ex;
#endif
- float *wtmp;
+ double *wtmp;
// short *ijppt;
int *mpjpt;
// short **ijp;
int *mp;
int mpi;
- float *mjpt, *prept, *curpt;
- float mi;
- float *m;
- float *w1, *w2;
-// float *match;
- float *initverticalw; /* kufuu sureba iranai */
- float *lastverticalw; /* kufuu sureba iranai */
+ double *mjpt, *prept, *curpt;
+ double mi;
+ double *m;
+ double *w1, *w2;
+// double *match;
+ double *initverticalw; /* kufuu sureba iranai */
+ double *lastverticalw; /* kufuu sureba iranai */
int **intwork;
- float **floatwork;
+ double **doublework;
// short **shortmtx;
#if STOREWM
- float **WMMTX;
- float **WMMTX2;
+ double **WMMTX;
+ double **WMMTX2;
#endif
- float *midw;
- float *midm;
- float *midn;
+ double *midw;
+ double *midm;
+ double *midn;
int lgth1, lgth2;
- float maxwm;
+ double maxwm;
int *jumpforwi;
int *jumpforwj;
int *jumpbacki;
int jumpi, jumpj = 0; // by D.Mathog, a guess
char *gaps;
int ijpi, ijpj;
- float *ogcp1;
- float *fgcp1;
- float *ogcp2;
- float *fgcp2;
- float firstm;
+ double *ogcp1;
+ double *fgcp1;
+ double *ogcp2;
+ double *fgcp2;
+ double firstm;
int firstmp;
#if STOREWM
static TLS char ttt1[50000];
static TLS char ttt2[50000];
#endif
+ double *gapfreq1f;
+ double *gapfreq2f;
+ double headgapfreq1;
+ double headgapfreq2;
#if 0
int nglen1, nglen2;
fgcp1 = gapinfo[1] + ist;
ogcp2 = gapinfo[2] + jst;
fgcp2 = gapinfo[3] + jst;
+ gapfreq1f = gapinfo[4] + ist;
+ gapfreq2f = gapinfo[5] + jst;
+
+ if( ist > 0 ) headgapfreq1 = gapfreq1f[-1];
+ else headgapfreq1 = headgapfreq1_g;
+ if( jst > 0 ) headgapfreq2 = gapfreq2f[-1];
+ else headgapfreq2 = headgapfreq2_g;
depth++;
reccycle++;
{
mseq2[i][0] = 0;
for( j=0; j<lgth1; j++ )
- strcat( mseq2[i], "-" );
+// strcat( mseq2[i], "-" );
+ strcat( mseq2[i], newgapstr );
}
// fprintf( stderr, "==== mseq1[0] (%d) = %s\n", depth, mseq1[0] );
{
// fprintf( stderr, "==== Going to _tanni\n" );
- value = MSalignmm_tanni( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, ist, ien, jst, jen, alloclen, fulllen1, fulllen2, aseq1, aseq2, gapinfo, headgp, tailgp );
+ value = MSalignmm_tanni( n_dynamicmtx, icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, ist, ien, jst, jen, alloclen, fulllen1, fulllen2, aseq1, aseq2, gapinfo, headgp, tailgp, headgapfreq1_g, headgapfreq2_g );
#if MEMSAVE
mp = AllocateIntVec( ll2+2 );
gaps = AllocateCharVec( MAX( ll1, ll2 ) + 2 );
- floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, 26 );
- intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, 26 );
+ doublework = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets );
+ intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets );
#if DEBUG
fprintf( stderr, "succeeded\n" );
currentw = w1;
previousw = w2;
- match_calc( initverticalw, cpmx2+jst, cpmx1+ist, 0, lgth1, floatwork, intwork, 1 );
+ match_calc( n_dynamicmtx, initverticalw, cpmx2+jst, cpmx1+ist, 0, lgth1, doublework, intwork, 1 );
- match_calc( currentw, cpmx1+ist, cpmx2+jst, 0, lgth2, floatwork, intwork, 1 );
+ match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, 0, lgth2, doublework, intwork, 1 );
for( i=1; i<lgth1+1; i++ )
{
- initverticalw[i] += ( ogcp1[0] + fgcp1[i-1] ) ;
+// initverticalw[i] += ( ogcp1[0] + fgcp1[i-1] ) ;
+ initverticalw[i] += ( ogcp1[0] * headgapfreq2 + fgcp1[i-1] * gapfreq2f[0] ) ;
}
for( j=1; j<lgth2+1; j++ )
{
- currentw[j] += ( ogcp2[0] + fgcp2[j-1] ) ;
+ currentw[j] += ( ogcp2[0] * headgapfreq1 + fgcp2[j-1] * gapfreq1f[0]) ;
}
#if STOREWM
for( j=1; j<lgth2+1; ++j )
{
- m[j] = currentw[j-1] + ogcp1[1];
+ m[j] = currentw[j-1] + ogcp1[1] * gapfreq2f[j-1];
// m[j] = currentw[j-1];
mp[j] = 0;
}
w1, w2, initverticalw, lastverticalw, midw, midm, midn,
jumpbacki, jumpbackj, jumpforwi, jumpforwj, jumpdummi, jumpdummj,
m, mp,
- floatwork, intwork
+ doublework, intwork
#if STOREWM
, WMMTX, WMMTX2
#endif
previousw[0] = initverticalw[i-1];
- match_calc( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, floatwork, intwork, 0 );
+ match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 );
currentw[0] = initverticalw[i];
m[0] = ogcp1[i];
#endif
if( i == imid ) midm[0] = m[0];
- mi = previousw[0] + ogcp2[1];
+ mi = previousw[0] + ogcp2[1] * gapfreq1f[i-1];
// mi = previousw[0];
mpi = 0;
#if 0
fprintf( stderr, "%5.0f->", wm );
#endif
- g = mi + fgcp2[j-1];
+ g = mi + fgcp2[j-1] * gapfreq1f[i];
// g = mi + fpenalty;
#if 0
fprintf( stderr, "%5.0f?", g );
wm = g;
// *ijppt = -( j - mpi );
}
- g = *prept + ogcp2[j];
+ g = *prept + ogcp2[j] * gapfreq1f[i-1];
// g = *prept;
if( g >= mi )
{
mi += fpenalty_ex;
#endif
- g = *mjpt + fgcp1[i-1];
+ g = *mjpt + fgcp1[i-1] * gapfreq2f[j];
// g = *mjpt + fpenalty;
#if 0
fprintf( stderr, "%5.0f?", g );
}
- g = *prept + ogcp1[i];
+ g = *prept + ogcp1[i] * gapfreq2f[j-1];
// g = *prept;
if( g >= *mjpt )
{
// gyakudp
- match_calc( initverticalw, cpmx2+jst, cpmx1+ist, lgth2-1, lgth1, floatwork, intwork, 1 );
- match_calc( currentw, cpmx1+ist, cpmx2+jst, lgth1-1, lgth2, floatwork, intwork, 1 );
+ match_calc( n_dynamicmtx, initverticalw, cpmx2+jst, cpmx1+ist, lgth2-1, lgth1, doublework, intwork, 1 );
+ match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, lgth1-1, lgth2, doublework, intwork, 1 );
for( i=0; i<lgth1-1; i++ )
{
- initverticalw[i] += ( fgcp1[lgth1-1] + ogcp1[i+1] );
-// initverticalw[i] += fpenalty;
+// initverticalw[i] += ( fgcp1[lgth1-1] + ogcp1[i+1] );
+ initverticalw[i] += ( fgcp1[lgth1-1] * gapfreq2f[lgth2] + ogcp1[i+1] * gapfreq2f[lgth2-1] );
}
for( j=0; j<lgth2-1; j++ )
{
- currentw[j] += ( fgcp2[lgth2-1] + ogcp2[j+1] );
-// currentw[j] += fpenalty;
+// currentw[j] += ( fgcp2[lgth2-1] + ogcp2[j+1] );
+ currentw[j] += ( fgcp2[lgth2-1] * gapfreq1f[lgth1] + ogcp2[j+1] * gapfreq1f[lgth1-1] );
}
#if STOREWM
+#if 0
for( j=lgth2-1; j>0; --j )
{
m[j-1] = currentw[j] + fgcp2[lgth2-2];
// m[j-1] = currentw[j];
mp[j] = lgth1-1;
}
+#else
+ for( j=lgth2-1; j>-1; --j )
+ {
+ m[j] = currentw[j+1] + fgcp1[lgth1-2] * gapfreq2f[j+1];
+// m[j-1] = currentw[j];
+ mp[j] = lgth1-1;
+ }
+#endif
// for( j=0; j<lgth2; j++ ) m[j] = 0.0;
// m[lgth2-1] ha irunoka?
w1, w2, initverticalw, lastverticalw, midw, midm, midn,
jumpbacki, jumpbackj, jumpforwi, jumpforwj, jumpdummi, jumpdummj,
m, mp,
- floatwork, intwork
+ doublework, intwork
#if STOREWM
, WMMTX, WMMTX2
#endif
currentw = wtmp;
previousw[lgth2-1] = initverticalw[i+1];
// match_calc( currentw, seq1, seq2, i, lgth2 );
- match_calc( currentw, cpmx1+ist, cpmx2+jst, i, lgth2, floatwork, intwork, 0 );
+ match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 );
currentw[lgth2-1] = initverticalw[i];
// WMMTX2[i][lgth2] += m[lgth2];
// fprintf( stderr, "m[] = %f\n", m[lgth2] );
- mi = previousw[lgth2-1] + fgcp2[lgth2-2];
+ mi = previousw[lgth2-1] + fgcp2[lgth2-2] * gapfreq1f[i+1];
// mi = previousw[lgth2-1];
mpi = lgth2 - 1;
ijpi = i+1;
ijpj = j+1;
- g = mi + ogcp2[j+1];
+ g = mi + ogcp2[j+1] * gapfreq1f[i];
// g = mi + fpenalty;
if( g > wm )
{
ijpi = i+1;
}
- g = *prept + fgcp2[j];
+ g = *prept + fgcp2[j] * gapfreq1f[i+1];
// g = *prept;
if( g >= mi )
{
#endif
// fprintf( stderr, "i,j=%d,%d *mpjpt = %d\n", i, j, *mpjpt );
- g = *mjpt + ogcp1[i+1];
+ g = *mjpt + ogcp1[i+1] * gapfreq2f[j];
// g = *mjpt + fpenalty;
if( g > wm )
{
}
// if( i == imid )fprintf( stderr, "i,j=%d,%d \n", i, j );
- g = *prept + fgcp1[i];
+ g = *prept + fgcp1[i] * gapfreq2f[j+1];
// g = *prept;
if( g >= *mjpt )
{
w1, w2, initverticalw, lastverticalw, midw, midm, midn,
jumpbacki, jumpbackj, jumpforwi, jumpforwj, jumpdummi, jumpdummj,
m, mp,
- floatwork, intwork
+ doublework, intwork
#if STOREWM
, WMMTX, WMMTX2
#endif
// fprintf( stderr, "==== calling myself (first)\n" );
- value = MSalignmm_rec( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, ist, ist+jumpi, jst, jst+jumpj, alloclen, fulllen1, fulllen2, aseq1, aseq2, depth, gapinfo, NULL, 0, NULL, headgp, tailgp ); // chudan mada
+ value = MSalignmm_rec( n_dynamicmtx, icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, ist, ist+jumpi, jst, jst+jumpj, alloclen, fulllen1, fulllen2, aseq1, aseq2, depth, gapinfo, NULL, 0, NULL, headgp, tailgp, headgapfreq1_g, headgapfreq2_g ); // chudan mada
#if 0
fprintf( stderr, "aseq1[0] = %s\n", aseq1[0] );
fprintf( stderr, "aseq2[0] = %s\n", aseq2[0] );
// fprintf( stderr, "l=%d\n", l );
if( l > 0 )
{
- for( i=0; i<l; i++ ) gaps[i] = '-'; gaps[i] = 0;
+// for( i=0; i<l; i++ ) gaps[i] = '-'; gaps[i] = 0;
+ for( i=0; i<l; i++ ) gaps[i] = *newgapstr; gaps[i] = 0;
for( i=0; i<icyc; i++ )
{
strcat( mseq1[i], gaps );
// fprintf( stderr, "l=%d\n", l );
if( l > 0 )
{
- for( i=0; i<l; i++ ) gaps[i] = '-'; gaps[i] = 0;
+// for( i=0; i<l; i++ ) gaps[i] = '-'; gaps[i] = 0;
+ for( i=0; i<l; i++ ) gaps[i] = *newgapstr; gaps[i] = 0;
for( i=0; i<icyc; i++ )
{
strncat( mseq1[i], seq1[i]+ist+jumpi+1, l );
for( i=0; i<jcyc; i++ ) aseq2[i] += alnlen;
#endif
- value += MSalignmm_rec( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, ist+imid, ien, jst+jmid, jen, alloclen, fulllen1, fulllen2, aseq1, aseq2, depth, gapinfo, NULL, 0, NULL, headgp, tailgp ); // chudan mada
+ value += MSalignmm_rec( n_dynamicmtx, icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, ist+imid, ien, jst+jmid, jen, alloclen, fulllen1, fulllen2, aseq1, aseq2, depth, gapinfo, NULL, 0, NULL, headgp, tailgp, headgapfreq1_g, headgapfreq2_g ); // chudan mada
#if 0
fprintf( stderr, "aseq1[0] = %s\n", aseq1[0] );
fprintf( stderr, "aseq2[0] = %s\n", aseq2[0] );
}
static void freearrays(
- float *ogcp1,
- float *ogcp2,
- float *fgcp1,
- float *fgcp2,
- float **cpmx1,
- float **cpmx2,
- float **gapinfo,
+ double *ogcp1,
+ double *ogcp2,
+ double *fgcp1,
+ double *fgcp2,
+ double **cpmx1,
+ double **cpmx2,
+ double *gapfreq1f,
+ double *gapfreq2f,
+ double **gapinfo,
char **mseq1,
char **mseq2
)
FreeFloatVec( fgcp2 );
FreeFloatMtx( cpmx1 );
FreeFloatMtx( cpmx2 );
+ FreeFloatVec( gapfreq1f );
+ FreeFloatVec( gapfreq2f );
free( (void *)gapinfo );
FreeCharMtx( mseq1 );
FreeCharMtx( mseq2 );
}
-float MSalignmm( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp )
+double MSalignmm( double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
// int k;
int i, j;
int ll1, ll2;
int lgth1, lgth2;
- float wm = 0.0; /* int ?????? */
+ double wm = 0.0; /* int ?????? */
char **mseq1;
char **mseq2;
- float *ogcp1;
- float *ogcp2;
- float *fgcp1;
- float *fgcp2;
- float **cpmx1;
- float **cpmx2;
- float **gapinfo;
- float fpenalty = (float)penalty;
+ double *ogcp1;
+ double *ogcp2;
+ double *fgcp1;
+ double *fgcp2;
+ double **cpmx1;
+ double **cpmx2;
+ double **gapinfo;
+ double fpenalty = (double)penalty;
+ double *gapfreq1f;
+ double *gapfreq2f;
int nglen1, nglen2;
+ double headgapfreq1;
+ double headgapfreq2;
#if 0
fprintf( stderr, "eff in SA+++align\n" );
mseq1 = AllocateCharMtx( icyc, ll1+ll2 );
mseq2 = AllocateCharMtx( jcyc, ll1+ll2 );
- gapinfo = AllocateFloatMtx( 4, 0 );
+ gapinfo = AllocateFloatMtx( 6, 0 );
ogcp1 = AllocateFloatVec( ll1+2 );
ogcp2 = AllocateFloatVec( ll2+2 );
fgcp1 = AllocateFloatVec( ll1+2 );
fgcp2 = AllocateFloatVec( ll2+2 );
- cpmx1 = AllocateFloatMtx( ll1+2, 27 );
- cpmx2 = AllocateFloatMtx( ll2+2, 27 );
+ cpmx1 = AllocateFloatMtx( ll1+2, nalphabets+1 );
+ cpmx2 = AllocateFloatMtx( ll2+2, nalphabets+1 );
+
+ gapfreq1f = AllocateFloatVec( ll1+2 ); // must be filled with 0.0
+ gapfreq2f = AllocateFloatVec( ll2+2 ); // must be filled with 0.0
for( i=0; i<icyc; i++ )
{
new_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2, sgap2 );
new_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1, egap2 );
new_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2, egap2 );
+ outgapcount( &headgapfreq1, icyc, sgap1, eff1 );
+ outgapcount( &headgapfreq2, jcyc, sgap2, eff2 );
+ outgapcount( gapfreq1f+lgth1, icyc, egap1, eff1 );
+ outgapcount( gapfreq2f+lgth2, jcyc, egap2, eff2 );
}
else
{
st_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2 );
st_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1 );
st_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2 );
+ headgapfreq1 = 0.0;
+ headgapfreq2 = 0.0;
+ gapfreq1f[lgth1] = 0.0;
+ gapfreq2f[lgth2] = 0.0;
+ }
+
+ if( legacygapcost == 0 )
+ {
+ gapcountf( gapfreq1f, seq1, icyc, eff1, lgth1 );
+ gapcountf( gapfreq2f, seq2, jcyc, eff2, lgth2 );
+ for( i=0; i<lgth1+1; i++ ) gapfreq1f[i] = 1.0 - gapfreq1f[i];
+ for( i=0; i<lgth2+1; i++ ) gapfreq2f[i] = 1.0 - gapfreq2f[i];
+ headgapfreq1 = 1.0 - headgapfreq1;
+ headgapfreq2 = 1.0 - headgapfreq2;
+ }
+ else
+ {
+ for( i=0; i<lgth1+1; i++ ) gapfreq1f[i] = 1.0;
+ for( i=0; i<lgth2+1; i++ ) gapfreq2f[i] = 1.0;
+ headgapfreq1 = 1.0;
+ headgapfreq2 = 1.0;
}
#if 1
for( i=0; i<lgth1; i++ )
{
- ogcp1[i] = 0.5 * ( 1.0 - ogcp1[i] ) * fpenalty;
- fgcp1[i] = 0.5 * ( 1.0 - fgcp1[i] ) * fpenalty;
+ ogcp1[i] = 0.5 * ( 1.0 - ogcp1[i] ) * fpenalty * ( gapfreq1f[i] );
+ fgcp1[i] = 0.5 * ( 1.0 - fgcp1[i] ) * fpenalty * ( gapfreq1f[i] );
// fprintf( stderr, "fgcp1[%d] = %f\n", i, fgcp1[i] );
}
for( i=0; i<lgth2; i++ )
{
- ogcp2[i] = 0.5 * ( 1.0 - ogcp2[i] ) * fpenalty;
- fgcp2[i] = 0.5 * ( 1.0 - fgcp2[i] ) * fpenalty;
+ ogcp2[i] = 0.5 * ( 1.0 - ogcp2[i] ) * fpenalty * ( gapfreq2f[i] );
+ fgcp2[i] = 0.5 * ( 1.0 - fgcp2[i] ) * fpenalty * ( gapfreq2f[i] );
// fprintf( stderr, "fgcp2[%d] = %f\n", i, fgcp2[i] );
}
#else
gapinfo[1] = fgcp1;
gapinfo[2] = ogcp2;
gapinfo[3] = fgcp2;
+ gapinfo[4] = gapfreq1f;
+ gapinfo[5] = gapfreq2f;
#endif
#if 0
fflush( stdout );
#endif
- wm = MSalignmm_rec( icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, 0, lgth1-1, 0, lgth2-1, alloclen, lgth1, lgth2, mseq1, mseq2, 0, gapinfo, chudanpt, chudanref, chudanres, headgp, tailgp );
+ wm = MSalignmm_rec( n_dynamicmtx, icyc, jcyc, eff1, eff2, seq1, seq2, cpmx1, cpmx2, 0, lgth1-1, 0, lgth2-1, alloclen, lgth1, lgth2, mseq1, mseq2, 0, gapinfo, chudanpt, chudanref, chudanres, headgp, tailgp, headgapfreq1, headgapfreq2 );
#ifdef enablemultithread
if( chudanres && *chudanres )
{
// fprintf( stderr, "\n\n## CHUUDAN!!! relay\n" );
*chudanres = 1;
- freearrays( ogcp1, ogcp2, fgcp1, fgcp2, cpmx1, cpmx2, gapinfo, mseq1, mseq2 );
+ freearrays( ogcp1, ogcp2, fgcp1, fgcp2, cpmx1, cpmx2, gapfreq1f, gapfreq2f, gapinfo, mseq1, mseq2 );
return( -1.0 );
}
#endif
-#if DEBUG
+#if 0
+ fprintf( stderr, "\n" );
fprintf( stderr, " seq1[0] = %s\n", seq1[0] );
fprintf( stderr, " seq2[0] = %s\n", seq2[0] );
fprintf( stderr, "mseq1[0] = %s\n", mseq1[0] );
fprintf( stderr, "mseq2[0] = %s\n", mseq2[0] );
+ fprintf( stderr, "\n" );
#endif
// fprintf( stderr, "wm = %f\n", wm );
}
- freearrays( ogcp1, ogcp2, fgcp1, fgcp2, cpmx1, cpmx2, gapinfo, mseq1, mseq2 );
+ freearrays( ogcp1, ogcp2, fgcp1, fgcp2, cpmx1, cpmx2, gapfreq1f, gapfreq2f, gapinfo, mseq1, mseq2 );
lgth1 = strlen( seq1[0] );
lgth2 = strlen( seq2[0] );
}
}
+#if 0
+ fprintf( stderr, "\n" );
+ for( i=0; i<icyc; i++ ) fprintf( stderr, " seq1[i] = %s\n", seq1[i] );
+ for( j=0; j<jcyc; j++ ) fprintf( stderr, " seq2[j] = %s\n", seq2[j] );
+ fprintf( stderr, "\n" );
+#endif
+
+ return( wm );
+}
+
+// -------------------------------
+// variousdist
+// -------------------------------
+
+static void fillzero( double *s, int l )
+{
+ while( l-- ) *s++ = 0.0;
+}
+
+static double MSalignmm_tanni_variousdist( double ***matrices, int icyc, int jcyc, char **seq1, char **seq2, double ***cpmx1s, double ***cpmx2s, int ist, int ien, int jst, int jen, int alloclen, int fulllen1, int fulllen2, char **mseq1, char **mseq2, double **gapinfo, int headgp, int tailgp, double headgapfreq1_g, double headgapfreq2_g )
+/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
+{
+// int k;
+ register int i, j, c;
+ int ll1, ll2;
+ int lasti, lastj;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
+#if USE_PENALTY_EX
+ double fpenalty_ex = (double)penalty_ex;
+#endif
+#if 1
+ double *wtmp;
+ int *ijppt;
+ double *mjpt, *prept, *curpt;
+ int *mpjpt;
+#endif
+ double mi, *m;
+ int **ijp;
+ int mpi, *mp;
+ double *w1, *w2;
+ double *initverticalw; /* kufuu sureba iranai */
+ double *lastverticalw; /* kufuu sureba iranai */
+ int ***intwork;
+ double ***doublework;
+ int **intmtx;
+ int lgth1, lgth2;
+ double *ogcp1;
+ double *fgcp1;
+ double *ogcp2;
+ double *fgcp2;
+ double *gapfreq1f;
+ double *gapfreq2f;
+// char **aseq1;
+// char **aseq2;
+// char **aseq1bk, **aseq2bk;
+ double headgapfreq1;
+ double headgapfreq2;
+
+
+ ogcp1 = gapinfo[0] + ist;
+ fgcp1 = gapinfo[1] + ist;
+ ogcp2 = gapinfo[2] + jst;
+ fgcp2 = gapinfo[3] + jst;
+ gapfreq1f = gapinfo[4] + ist;
+ gapfreq2f = gapinfo[5] + jst;
+
+ if( ist > 0 ) headgapfreq1 = gapfreq1f[-1];
+ else headgapfreq1 = headgapfreq1_g;
+ if( jst > 0 ) headgapfreq2 = gapfreq2f[-1];
+ else headgapfreq2 = headgapfreq2_g;
+
+#if STOREWM
+ char ttt1[10000], ttt2[10000];
+#endif
+
+
+ lgth1 = ien-ist+1;
+ lgth2 = jen-jst+1;
+
+#if STOREWM
+ strncpy( ttt1, seq1[0]+ist, lgth1 ); ttt1[lgth1] = 0;
+ strncpy( ttt2, seq2[0]+jst, lgth2 ); ttt2[lgth2] = 0;
+
+ fprintf( stderr, "in _tanni ist,ien = %d,%d, lgth1=%d\n", ist, ien, lgth1 );
+ fprintf( stderr, "in _tanni jst,jen = %d,%d, lgth2=%d\n", jst, jen, lgth2 );
+ fprintf( stderr, "ttt1 = %s\n", ttt1 );
+ fprintf( stderr, "ttt2 = %s\n", ttt2 );
+#endif
+
+#if 0
+ fprintf( stderr, "in _tanni ist,ien = %d,%d, fulllen1=%d\n", ist, ien, fulllen1 );
+ fprintf( stderr, "in _tanni jst,jen = %d,%d, fulllen2=%d\n", jst, jen, fulllen2 );
+ fprintf( stderr, "in _tanni seq1[0] = %-*.*s\n", ien-ist+1, ien-ist+1, seq1[0]+ist );
+ fprintf( stderr, "in _tanni seq2[0] = %-*.*s\n", jen-jst+1, jen-jst+1, seq2[0]+jst );
+#endif
+
+
+ ll1 = ( (int)(lgth1) ) + 100;
+ ll2 = ( (int)(lgth2) ) + 100;
+
+// aseq1 = AllocateCharMtx( icyc, 0 );
+// aseq2 = AllocateCharMtx( jcyc, 0 );
+// aseq1bk = AllocateCharMtx( icyc, lgth1+lgth2+100 );
+// aseq2bk = AllocateCharMtx( jcyc, lgth1+lgth2+100 );
+// for( i=0; i<icyc; i++ ) aseq1[i] = aseq1bk[i];
+// for( i=0; i<jcyc; i++ ) aseq2[i] = aseq2bk[i];
+
+ w1 = AllocateFloatVec( ll2+2 );
+ w2 = AllocateFloatVec( ll2+2 );
+
+ initverticalw = AllocateFloatVec( ll1+2 );
+ lastverticalw = AllocateFloatVec( ll1+2 );
+
+ m = AllocateFloatVec( ll2+2 );
+ mp = AllocateIntVec( ll2+2 );
+
+ doublework = AllocateFloatCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets+1 );
+ intwork = AllocateIntCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets+1 );
+
+
+ intmtx = AllocateIntMtx( ll1+1, ll2+1 );
+
+ ijp = intmtx;
+
+ currentw = w1;
+ previousw = w2;
+
+#if 0
+ match_calc( n_dynamicmtx, initverticalw, cpmx2+jst, cpmx1+ist, 0, lgth1, doublework, intwork, 1 );
+ match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, 0, lgth2, doublework, intwork, 1 );
+#else
+ fillzero( initverticalw, lgth1 );
+ for( c=0; c<maxdistclass; c++ )
+ match_calc_add( matrices[c], initverticalw, cpmx2s[c]+jst, cpmx1s[c]+ist, 0, lgth1, doublework[c], intwork[c], 1 );
+
+ fillzero( currentw, lgth2 );
+ for( c=0; c<maxdistclass; c++ )
+ match_calc_add( matrices[c], currentw, cpmx1s[c]+ist, cpmx2s[c]+jst, 0, lgth2, doublework[c], intwork[c], 1 );
+#endif
+
+ if( headgp || ist != 0 )
+ {
+ for( i=1; i<lgth1+1; i++ )
+ {
+ initverticalw[i] += ( ogcp1[0] * headgapfreq2 + fgcp1[i-1] * gapfreq2f[0] ) ;
+ }
+ }
+ if( headgp || jst != 0 )
+ {
+ for( j=1; j<lgth2+1; j++ )
+ {
+ currentw[j] += ( ogcp2[0] * headgapfreq1 + fgcp2[j-1] * gapfreq1f[0] ) ;
+ }
+ }
+
+ for( j=1; j<lgth2+1; ++j )
+ {
+ m[j] = currentw[j-1] + ogcp1[1] * gapfreq2f[j-1]; mp[j] = 0;;
+ }
+
+ lastverticalw[0] = currentw[lgth2-1];
+
+
+
+ if( tailgp || jen != fulllen2-1 ) lasti = lgth1+1; else lasti = lgth1;
+// if( 1 ) lasti = lgth1+1; else lasti = lgth1;
+ for( i=1; i<lasti; i++ )
+ {
+ wtmp = previousw;
+ previousw = currentw;
+ currentw = wtmp;
+
+ previousw[0] = initverticalw[i-1];
+
+#if 0
+ match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 );
+#else
+ fillzero( currentw, lgth2 );
+ for( c=0; c<maxdistclass; c++ )
+ match_calc_add( matrices[c], currentw, cpmx1s[c]+ist, cpmx2s[c]+jst, i, lgth2, doublework[c], intwork[c], 0 );
+#endif
+ currentw[0] = initverticalw[i];
+
+ mi = previousw[0] + ogcp2[1] * gapfreq1f[i-1];
+ mpi = 0;
+
+ ijppt = ijp[i] + 1;
+ mjpt = m + 1;
+ prept = previousw;
+ curpt = currentw + 1;
+ mpjpt = mp + 1;
+// if( tailgp && jen != fulllen2-1 ) lastj = lgth2+1; else lastj = lgth2;
+ lastj = lgth2+1;
+ for( j=1; j<lastj; j++ )
+ {
+ wm = *prept;
+ *ijppt = 0;
+
+#if 0
+ fprintf( stderr, "%5.0f->", wm );
+#endif
+ g = mi + fgcp2[j-1] * gapfreq1f[i];
+#if 0
+ fprintf( stderr, "%5.0f?", g );
+#endif
+ if( g > wm )
+ {
+ wm = g;
+ *ijppt = -( j - mpi );
+ }
+ g = *prept + ogcp2[j] * gapfreq1f[i-1];
+ if( g >= mi )
+ {
+ mi = g;
+ mpi = j-1;
+ }
+#if USE_PENALTY_EX
+ mi += fpenalty_ex;
+#endif
+
+ g = *mjpt + fgcp1[i-1] * gapfreq2f[j];
+#if 0
+ fprintf( stderr, "%5.0f?", g );
+#endif
+ if( g > wm )
+ {
+ wm = g;
+ *ijppt = +( i - *mpjpt );
+ }
+ g = *prept + ogcp1[i] * gapfreq2f[j-1];
+ if( g >= *mjpt )
+ {
+ *mjpt = g;
+ *mpjpt = i-1;
+ }
+#if USE_PENALTY_EX
+ m[j] += fpenalty_ex;
+#endif
+
+#if 0
+ fprintf( stderr, "%5.0f ", wm );
+#endif
+ *curpt += wm;
+
+
+ ijppt++;
+ mjpt++;
+ prept++;
+ mpjpt++;
+ curpt++;
+ }
+ lastverticalw[i] = currentw[lgth2-1];
+ }
+
+// fprintf( stderr, "wm = %f\n", wm );
+
+ Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, ist, ien, jst, jen, fulllen1, fulllen2, tailgp );
+#if 0
+ fprintf( stderr, "res in _tanni mseq1[0] = %s\n", mseq1[0] );
+ fprintf( stderr, "res in _tanni mseq2[0] = %s\n", mseq2[0] );
+#endif
+
+// for( i=0; i<icyc; i++ ) strcpy( mseq1[i], aseq1[i] );
+// for( i=0; i<jcyc; i++ ) strcpy( mseq2[i], aseq2[i] );
+
+// fprintf( stderr, "in _tanni, aseq1 = %s\n", aseq1[0] );
+// fprintf( stderr, "in _tanni, mseq1 = %s\n", mseq1[0] );
+
+ FreeFloatVec( w1 );
+ FreeFloatVec( w2 );
+ FreeFloatVec( initverticalw );
+ FreeFloatVec( lastverticalw );
+
+ FreeFloatVec( m );
+ FreeIntVec( mp );
+
+
+ FreeFloatCub( doublework );
+ FreeIntCub( intwork );
+
+ FreeIntMtx( intmtx );
+
+
+// FreeCharMtx( aseq1bk );
+// FreeCharMtx( aseq2bk );
+
+// free( aseq1 );
+// free( aseq2 );
+
+ return( wm );
+
+}
+
+static void freearrays_rec1_variousdist(
+ double *w1, double *w2, double *initverticalw, double *lastverticalw,
+ double *midw, double *midm, double *midn,
+ int *jumpbacki, int *jumpbackj, int *jumpforwi, int *jumpforwj, int *jumpdummi, int *jumpdummj,
+ double *m, int *mp,
+ double ***doublework, int ***intwork
+#if STOREWM
+ , double **WMMTX, double **WMMTX2
+#endif
+)
+{
+
+ FreeFloatVec( w1 );
+ FreeFloatVec( w2 );
+ FreeFloatVec( initverticalw );
+ FreeFloatVec( lastverticalw );
+ FreeFloatVec( midw );
+ FreeFloatVec( midm );
+ FreeFloatVec( midn );
+
+ FreeIntVec( jumpbacki );
+ FreeIntVec( jumpbackj );
+ FreeIntVec( jumpforwi );
+ FreeIntVec( jumpforwj );
+ FreeIntVec( jumpdummi );
+ FreeIntVec( jumpdummj );
+
+ FreeFloatVec( m );
+ FreeIntVec( mp );
+
+ FreeFloatCub( doublework );
+ FreeIntCub( intwork );
+
+#if STOREWM
+ FreeFloatMtx( WMMTX );
+ FreeFloatMtx( WMMTX2 );
+#endif
+}
+
+static double MSalignmm_rec_variousdist( double ***matrices, int icyc, int jcyc, char **seq1, char **seq2, double ***cpmx1s, double ***cpmx2s, int ist, int ien, int jst, int jen, int alloclen, int fulllen1, int fulllen2, char **mseq1, char **mseq2, int depth, double **gapinfo, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp, double headgapfreq1_g, double headgapfreq2_g )
+/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
+{
+// int k;
+ int alnlen;
+ double value = 0.0;
+ register int i, j, c;
+ char **aseq1, **aseq2;
+ int ll1, ll2, l, len;
+ int lasti, lastj, imid;
+ int jmid = 0; // by D.Mathog, a guess
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
+#if USE_PENALTY_EX
+ double fpenalty_ex = (double)penalty_ex;
+#endif
+ double *wtmp;
+// short *ijppt;
+ int *mpjpt;
+// short **ijp;
+ int *mp;
+ int mpi;
+ double *mjpt, *prept, *curpt;
+ double mi;
+ double *m;
+ double *w1, *w2;
+// double *match;
+ double *initverticalw; /* kufuu sureba iranai */
+ double *lastverticalw; /* kufuu sureba iranai */
+ int ***intwork;
+ double ***doublework;
+// short **shortmtx;
+#if STOREWM
+ double **WMMTX;
+ double **WMMTX2;
+#endif
+ double *midw;
+ double *midm;
+ double *midn;
+ int lgth1, lgth2;
+ double maxwm;
+ int *jumpforwi;
+ int *jumpforwj;
+ int *jumpbacki;
+ int *jumpbackj;
+ int *jumpdummi; //muda
+ int *jumpdummj = NULL; // by D.Mathog, a guess
+ int jumpi, jumpj = 0; // by D.Mathog, a guess
+ char *gaps;
+ int ijpi, ijpj;
+ double *ogcp1;
+ double *fgcp1;
+ double *ogcp2;
+ double *fgcp2;
+ double firstm;
+ int firstmp;
+#if STOREWM
+ static TLS char ttt1[50000];
+ static TLS char ttt2[50000];
+#endif
+ double *gapfreq1f;
+ double *gapfreq2f;
+ double headgapfreq1;
+ double headgapfreq2;
+
+#if 0
+ int nglen1, nglen2;
+ nglen1 = seqlen( seq1[0] );
+ nglen2 = seqlen( seq2[0] );
+#endif
+
+// fprintf( stderr, "fulllen1 = %d, fulllen2 = %d, headgp = %d, tailgp = %d\n", fulllen1, fulllen2, headgp, tailgp );
+
+ ogcp1 = gapinfo[0] + ist;
+ fgcp1 = gapinfo[1] + ist;
+ ogcp2 = gapinfo[2] + jst;
+ fgcp2 = gapinfo[3] + jst;
+ gapfreq1f = gapinfo[4] + ist;
+ gapfreq2f = gapinfo[5] + jst;
+
+ if( ist > 0 ) headgapfreq1 = gapfreq1f[-1];
+ else headgapfreq1 = headgapfreq1_g;
+ if( jst > 0 ) headgapfreq2 = gapfreq2f[-1];
+ else headgapfreq2 = headgapfreq2_g;
+
+ depth++;
+ reccycle++;
+
+ lgth1 = ien-ist+1;
+ lgth2 = jen-jst+1;
+
+// if( lgth1 < 5 )
+// fprintf( stderr, "\nWARNING: lgth1 = %d\n", lgth1 );
+// if( lgth2 < 5 )
+// fprintf( stderr, "\nWARNING: lgth2 = %d\n", lgth2 );
+//
+
+
+#if STOREWM
+ fprintf( stderr, "==== MSalign (depth=%d, reccycle=%d), ist=%d, ien=%d, jst=%d, jen=%d\n", depth, reccycle, ist, ien, jst, jen );
+ strncpy( ttt1, seq1[0]+ist, lgth1 );
+ strncpy( ttt2, seq2[0]+jst, lgth2 );
+ ttt1[lgth1] = 0;
+ ttt2[lgth2] = 0;
+ fprintf( stderr, "seq1 = %s\n", ttt1 );
+ fprintf( stderr, "seq2 = %s\n", ttt2 );
+#endif
+ if( lgth2 <= 0 ) // lgth1 <= 0 ha?
+ {
+// fprintf( stderr, "\n\n==== jimei\n\n" );
+// exit( 1 );
+ for( i=0; i<icyc; i++ )
+ {
+ strncpy( mseq1[i], seq1[i]+ist, lgth1 );
+ mseq1[i][lgth1] = 0;
+ }
+ for( i=0; i<jcyc; i++ )
+ {
+ mseq2[i][0] = 0;
+ for( j=0; j<lgth1; j++ )
+// strcat( mseq2[i], "-" );
+ strcat( mseq2[i], newgapstr );
+ }
+
+// fprintf( stderr, "==== mseq1[0] (%d) = %s\n", depth, mseq1[0] );
+// fprintf( stderr, "==== mseq2[0] (%d) = %s\n", depth, mseq2[0] );
+
+ return( 0.0 );
+ }
+
+#if MEMSAVE
+ aseq1 = AllocateCharMtx( icyc, 0 );
+ aseq2 = AllocateCharMtx( jcyc, 0 );
+ for( i=0; i<icyc; i++ ) aseq1[i] = mseq1[i];
+ for( i=0; i<jcyc; i++ ) aseq2[i] = mseq2[i];
+#else
+ aseq1 = AllocateCharMtx( icyc, lgth1+lgth2+100 );
+ aseq2 = AllocateCharMtx( jcyc, lgth1+lgth2+100 );
+#endif
+
+// fprintf( stderr, "####(s) seq1[0] (%d) = \n%-*.*s\n (a%d-a%d)\n", depth, ien-ist+1, ien-ist+1, seq1[0]+ist, ist, ien );
+// fprintf( stderr, "####(s) seq2[0] (%d) = \n%-*.*s\n (b%d-b%d)\n", depth, jen-jst+1, jen-jst+1, seq2[0]+jst, jst, jen );
+
+// if( lgth1 < DPTANNI && lgth2 < DPTANNI ) // & dato lgth ==1 no kanousei ga arunode yokunai
+// if( lgth1 < DPTANNI ) // kore mo lgth2 ga mijikasugiru kanousei ari
+ if( lgth1 < DPTANNI || lgth2 < DPTANNI ) // zettai ni anzen ka?
+ {
+// fprintf( stderr, "==== Going to _tanni\n" );
+
+ value = MSalignmm_tanni_variousdist( matrices, icyc, jcyc, seq1, seq2, cpmx1s, cpmx2s, ist, ien, jst, jen, alloclen, fulllen1, fulllen2, aseq1, aseq2, gapinfo, headgp, tailgp, headgapfreq1_g, headgapfreq2_g );
+
+
+#if MEMSAVE
+ free( aseq1 );
+ free( aseq2 );
+#else
+ for( i=0; i<icyc; i++ ) strcpy( mseq1[i], aseq1[i] );
+ for( i=0; i<jcyc; i++ ) strcpy( mseq2[i], aseq2[i] );
+
+ FreeCharMtx( aseq1 );
+ FreeCharMtx( aseq2 );
+#endif
+
+// fprintf( stderr, "value = %f\n", value );
+
+ return( value );
+ }
+// fprintf( stderr, "Trying to divide the mtx\n" );
+
+ ll1 = ( (int)(lgth1) ) + 100;
+ ll2 = ( (int)(lgth2) ) + 100;
+
+// fprintf( stderr, "ll1,ll2=%d,%d\n", ll1, ll2 );
+
+ w1 = AllocateFloatVec( ll2+2 );
+ w2 = AllocateFloatVec( ll2+2 );
+// match = AllocateFloatVec( ll2+2 );
+ midw = AllocateFloatVec( ll2+2 );
+ midn = AllocateFloatVec( ll2+2 );
+ midm = AllocateFloatVec( ll2+2 );
+ jumpbacki = AllocateIntVec( ll2+2 );
+ jumpbackj = AllocateIntVec( ll2+2 );
+ jumpforwi = AllocateIntVec( ll2+2 );
+ jumpforwj = AllocateIntVec( ll2+2 );
+ jumpdummi = AllocateIntVec( ll2+2 );
+ jumpdummj = AllocateIntVec( ll2+2 );
+
+ initverticalw = AllocateFloatVec( ll1+2 );
+ lastverticalw = AllocateFloatVec( ll1+2 );
+
+ m = AllocateFloatVec( ll2+2 );
+ mp = AllocateIntVec( ll2+2 );
+ gaps = AllocateCharVec( MAX( ll1, ll2 ) + 2 );
+
+ doublework = AllocateFloatCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets );
+ intwork = AllocateIntCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets );
+
+#if DEBUG
+ fprintf( stderr, "succeeded\n" );
+#endif
+
+#if STOREWM
+ WMMTX = AllocateFloatMtx( ll1, ll2 );
+ WMMTX2 = AllocateFloatMtx( ll1, ll2 );
+#endif
+#if 0
+ shortmtx = AllocateShortMtx( ll1, ll2 );
+
+#if DEBUG
+ fprintf( stderr, "succeeded\n\n" );
+#endif
+
+ ijp = shortmtx;
+#endif
+
+ currentw = w1;
+ previousw = w2;
+
+#if 0
+ match_calc( n_dynamicmtx, initverticalw, cpmx2+jst, cpmx1+ist, 0, lgth1, doublework, intwork, 1 );
+ match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, 0, lgth2, doublework, intwork, 1 );
+#else
+ fillzero( initverticalw, lgth1 );
+ for( c=0; c<maxdistclass; c++ )
+ match_calc_add( matrices[c], initverticalw, cpmx2s[c]+jst, cpmx1s[c]+ist, 0, lgth1, doublework[c], intwork[c], 1 );
+
+ fillzero( currentw, lgth2 );
+ for( c=0; c<maxdistclass; c++ )
+ match_calc_add( matrices[c], currentw, cpmx1s[c]+ist, cpmx2s[c]+jst, 0, lgth2, doublework[c], intwork[c], 1 );
+#endif
+
+
+ for( i=1; i<lgth1+1; i++ )
+ {
+// initverticalw[i] += ( ogcp1[0] + fgcp1[i-1] ) ;
+ initverticalw[i] += ( ogcp1[0] * headgapfreq2 + fgcp1[i-1] * gapfreq2f[0] ) ;
+ }
+ for( j=1; j<lgth2+1; j++ )
+ {
+ currentw[j] += ( ogcp2[0] * headgapfreq1 + fgcp2[j-1] * gapfreq1f[0]) ;
+ }
+
+#if STOREWM
+ WMMTX[0][0] = initverticalw[0];
+ for( i=1; i<lgth1+1; i++ )
+ {
+ WMMTX[i][0] = initverticalw[i];
+ }
+ for( j=1; j<lgth2+1; j++ )
+ {
+ WMMTX[0][j] = currentw[j];
+ }
+#endif
+
+
+ for( j=1; j<lgth2+1; ++j )
+ {
+ m[j] = currentw[j-1] + ogcp1[1] * gapfreq2f[j-1];
+// m[j] = currentw[j-1];
+ mp[j] = 0;
+ }
+
+ lastverticalw[0] = currentw[lgth2-1];
+
+ imid = lgth1 * 0.5;
+
+ jumpi = 0; // atode kawaru.
+ lasti = lgth1+1;
+#if STOREWM
+ for( i=1; i<lasti; i++ )
+#else
+ for( i=1; i<=imid; i++ )
+#endif
+ {
+#ifdef enablemultithread
+// fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref );
+ if( chudanpt && *chudanpt != chudanref )
+ {
+// fprintf( stderr, "\n\n## CHUUDAN!!! zenhan\n" );
+ *chudanres = 1;
+ freearrays_rec1_variousdist
+ (
+ w1, w2, initverticalw, lastverticalw, midw, midm, midn,
+ jumpbacki, jumpbackj, jumpforwi, jumpforwj, jumpdummi, jumpdummj,
+ m, mp,
+ doublework, intwork
+#if STOREWM
+ , WMMTX, WMMTX2
+#endif
+ );
+ freearrays_rec2( gaps, aseq1, aseq2 );
+ return( -1.0 );
+ }
+#endif
+ wtmp = previousw;
+ previousw = currentw;
+ currentw = wtmp;
+
+ previousw[0] = initverticalw[i-1];
+
+#if 0
+ match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 );
+#else
+ fillzero( currentw, lgth2 );
+ for( c=0; c<maxdistclass; c++ )
+ match_calc_add( matrices[c], currentw, cpmx1s[c]+ist, cpmx2s[c]+jst, i, lgth2, doublework[c], intwork[c], 0 );
+#endif
+ currentw[0] = initverticalw[i];
+
+ m[0] = ogcp1[i];
+#if STOREM
+ WMMTX2[i][0] = m[0];
+#endif
+ if( i == imid ) midm[0] = m[0];
+
+ mi = previousw[0] + ogcp2[1] * gapfreq1f[i-1];
+// mi = previousw[0];
+ mpi = 0;
+
+
+// ijppt = ijp[i] + 1;
+ mjpt = m + 1;
+ prept = previousw;
+ curpt = currentw + 1;
+ mpjpt = mp + 1;
+
+
+ lastj = lgth2+1;
+ for( j=1; j<lastj; j++ )
+ {
+
+ wm = *prept;
+
+#if 0
+ fprintf( stderr, "%5.0f->", wm );
+#endif
+ g = mi + fgcp2[j-1] * gapfreq1f[i];
+// g = mi + fpenalty;
+#if 0
+ fprintf( stderr, "%5.0f?", g );
+#endif
+ if( g > wm )
+ {
+ wm = g;
+// *ijppt = -( j - mpi );
+ }
+ g = *prept + ogcp2[j] * gapfreq1f[i-1];
+// g = *prept;
+ if( g >= mi )
+ {
+ mi = g;
+ mpi = j-1;
+ }
+#if USE_PENALTY_EX
+ mi += fpenalty_ex;
+#endif
+
+ g = *mjpt + fgcp1[i-1] * gapfreq2f[j];
+// g = *mjpt + fpenalty;
+#if 0
+ fprintf( stderr, "%5.0f?", g );
+#endif
+ if( g > wm )
+ {
+ wm = g;
+// *ijppt = +( i - *mpjpt );
+ }
+
+
+ g = *prept + ogcp1[i] * gapfreq2f[j-1];
+// g = *prept;
+ if( g >= *mjpt )
+ {
+ *mjpt = g;
+ *mpjpt = i-1;
+ }
+#if USE_PENALTY_EX
+ m[j] += fpenalty_ex;
+#endif
+
+#if 0
+ fprintf( stderr, "%5.0f ", wm );
+#endif
+ *curpt += wm;
+
+#if STOREWM
+ WMMTX[i][j] = *curpt;
+ WMMTX2[i][j] = *mjpt;
+#endif
+
+ if( i == imid ) //muda
+ {
+ jumpbackj[j] = *mpjpt; // muda atode matomeru
+ jumpbacki[j] = mpi; // muda atode matomeru
+// fprintf( stderr, "jumpbackj[%d] in forward dp is %d\n", j, *mpjpt );
+// fprintf( stderr, "jumpbacki[%d] in forward dp is %d\n", j, mpi );
+ midw[j] = *curpt;
+ midm[j] = *mjpt;
+ midn[j] = mi;
+ }
+
+// fprintf( stderr, "m[%d] = %f\n", j, m[j] );
+ mjpt++;
+ prept++;
+ mpjpt++;
+ curpt++;
+
+ }
+ lastverticalw[i] = currentw[lgth2-1];
+
+#if STOREWM
+ WMMTX2[i][lgth2] = m[lgth2-1];
+#endif
+
+#if 0 // ue
+ if( i == imid )
+ {
+ for( j=0; j<lgth2; j++ ) midw[j] = currentw[j];
+ for( j=0; j<lgth2; j++ ) midm[j] = m[j];
+ }
+#endif
+ }
+// for( j=0; j<lgth2; j++ ) midw[j] = WMMTX[imid][j];
+// for( j=0; j<lgth2; j++ ) midm[j] = WMMTX2[imid][j];
+
+#if 0
+ for( i=0; i<lgth1; i++ )
+ {
+ for( j=0; j<lgth2; j++ )
+ {
+ fprintf( stderr, "% 10.2f ", WMMTX[i][j] );
+ }
+ fprintf( stderr, "\n" );
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "WMMTX2 = \n" );
+ for( i=0; i<lgth1; i++ )
+ {
+ for( j=0; j<lgth2; j++ )
+ {
+ fprintf( stderr, "% 10.2f ", WMMTX2[i][j] );
+ }
+ fprintf( stderr, "\n" );
+ }
+ fprintf( stderr, "\n" );
+#endif
+
+// gyakudp
+
+#if 0
+ match_calc( n_dynamicmtx, initverticalw, cpmx2+jst, cpmx1+ist, lgth2-1, lgth1, doublework, intwork, 1 );
+ match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, lgth1-1, lgth2, doublework, intwork, 1 );
+#else
+ fillzero( initverticalw, lgth1 );
+ for( c=0; c<maxdistclass; c++ )
+ match_calc_add( matrices[c], initverticalw, cpmx2s[c]+jst, cpmx1s[c]+ist, lgth2-1, lgth1, doublework[c], intwork[c], 1 );
+
+ fillzero( currentw, lgth2 );
+ for( c=0; c<maxdistclass; c++ )
+ match_calc_add( matrices[c], currentw, cpmx1s[c]+ist, cpmx2s[c]+jst, lgth1-1, lgth2, doublework[c], intwork[c], 1 );
+#endif
+
+ for( i=0; i<lgth1-1; i++ )
+ {
+// initverticalw[i] += ( fgcp1[lgth1-1] + ogcp1[i+1] );
+ initverticalw[i] += ( fgcp1[lgth1-1] * gapfreq2f[lgth2] + ogcp1[i+1] * gapfreq2f[lgth2-1] );
+ }
+ for( j=0; j<lgth2-1; j++ )
+ {
+// currentw[j] += ( fgcp2[lgth2-1] + ogcp2[j+1] );
+ currentw[j] += ( fgcp2[lgth2-1] * gapfreq1f[lgth1] + ogcp2[j+1] * gapfreq1f[lgth1-1] );
+ }
+
+#if STOREWM
+ for( i=0; i<lgth1-1; i++ )
+ {
+ WMMTX[i][lgth2-1] += ( fgcp1[lgth1-1] + ogcp1[i+1] );
+ fprintf( stderr, "fgcp1[lgth1-1] + ogcp1[i+1] = %f\n", fgcp1[lgth1-1] + ogcp1[i+1] );
+ }
+ for( j=0; j<lgth2-1; j++ )
+ {
+ WMMTX[lgth1-1][j] += ( fgcp2[lgth2-1] + ogcp2[j+1] );
+ fprintf( stderr, "fgcp2[lgth2-1] + ogcp2[j+1] = %f\n", fgcp2[lgth2-1] + ogcp2[j+1] );
+ }
+#endif
+
+
+
+
+
+
+#if 0
+ for( j=lgth2-1; j>0; --j )
+ {
+ m[j-1] = currentw[j] + fgcp2[lgth2-2];
+// m[j-1] = currentw[j];
+ mp[j] = lgth1-1;
+ }
+#else
+ for( j=lgth2-1; j>-1; --j )
+ {
+ m[j] = currentw[j+1] + fgcp1[lgth1-2] * gapfreq2f[j+1];
+// m[j-1] = currentw[j];
+ mp[j] = lgth1-1;
+ }
+#endif
+
+// for( j=0; j<lgth2; j++ ) m[j] = 0.0;
+ // m[lgth2-1] ha irunoka?
+
+
+// for( i=lgth1-2; i>=imid; i-- )
+ firstm = -9999999.9;
+// firstmp = lgth1-1;
+ firstmp = lgth1;
+ for( i=lgth1-2; i>-1; i-- )
+ {
+#ifdef enablemultithread
+// fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref );
+ if( chudanpt && *chudanpt != chudanref )
+ {
+// fprintf( stderr, "\n\n## CHUUDAN!!! kouhan\n" );
+ *chudanres = 1;
+ freearrays_rec1_variousdist
+ (
+ w1, w2, initverticalw, lastverticalw, midw, midm, midn,
+ jumpbacki, jumpbackj, jumpforwi, jumpforwj, jumpdummi, jumpdummj,
+ m, mp,
+ doublework, intwork
+#if STOREWM
+ , WMMTX, WMMTX2
+#endif
+ );
+ freearrays_rec2( gaps, aseq1, aseq2 );
+ return( -1.0 );
+ }
+#endif
+ wtmp = previousw;
+ previousw = currentw;
+ currentw = wtmp;
+ previousw[lgth2-1] = initverticalw[i+1];
+#if 0
+ match_calc( n_dynamicmtx, currentw, cpmx1+ist, cpmx2+jst, i, lgth2, doublework, intwork, 0 );
+#else
+ fillzero( currentw, lgth2 );
+ for( c=0; c<maxdistclass; c++ )
+ match_calc_add( matrices[c], currentw, cpmx1s[c]+ist, cpmx2s[c]+jst, i, lgth2, doublework[c], intwork[c], 0 );
+#endif
+
+ currentw[lgth2-1] = initverticalw[i];
+
+// m[lgth2] = fgcp1[i];
+// WMMTX2[i][lgth2] += m[lgth2];
+// fprintf( stderr, "m[] = %f\n", m[lgth2] );
+
+ mi = previousw[lgth2-1] + fgcp2[lgth2-2] * gapfreq1f[i+1];
+// mi = previousw[lgth2-1];
+ mpi = lgth2 - 1;
+
+ mjpt = m + lgth2 - 2;
+ prept = previousw + lgth2 - 1;
+ curpt = currentw + lgth2 - 2;
+ mpjpt = mp + lgth2 - 2;
+
+
+ for( j=lgth2-2; j>-1; j-- )
+ {
+ wm = *prept;
+ ijpi = i+1;
+ ijpj = j+1;
+
+ g = mi + ogcp2[j+1] * gapfreq1f[i];
+// g = mi + fpenalty;
+ if( g > wm )
+ {
+ wm = g;
+ ijpj = mpi;
+ ijpi = i+1;
+ }
+
+ g = *prept + fgcp2[j] * gapfreq1f[i+1];
+// g = *prept;
+ if( g >= mi )
+ {
+// fprintf( stderr, "i,j=%d,%d - renewed! mpi = %d\n", i, j, j+1 );
+ mi = g;
+ mpi = j + 1;
+ }
+
+#if USE_PENALTY_EX
+ mi += fpenalty_ex;
+#endif
+
+// fprintf( stderr, "i,j=%d,%d *mpjpt = %d\n", i, j, *mpjpt );
+ g = *mjpt + ogcp1[i+1] * gapfreq2f[j];
+// g = *mjpt + fpenalty;
+ if( g > wm )
+ {
+ wm = g;
+ ijpi = *mpjpt;
+ ijpj = j+1;
+ }
+
+// if( i == imid )fprintf( stderr, "i,j=%d,%d \n", i, j );
+ g = *prept + fgcp1[i] * gapfreq2f[j+1];
+// g = *prept;
+ if( g >= *mjpt )
+ {
+ *mjpt = g;
+ *mpjpt = i + 1;
+ }
+
+#if USE_PENALTY_EX
+ m[j] += fpenalty_ex;
+#endif
+
+ if( i == jumpi || i == imid - 1 )
+ {
+ jumpforwi[j] = ijpi; //muda
+ jumpforwj[j] = ijpj; //muda
+// fprintf( stderr, "jumpfori[%d] = %d\n", j, ijpi );
+// fprintf( stderr, "jumpforj[%d] = %d\n", j, ijpj );
+ }
+ if( i == imid ) // muda
+ {
+ midw[j] += wm;
+// midm[j+1] += *mjpt + fpenalty; //??????
+ midm[j+1] += *mjpt; //??????
+ }
+ if( i == imid - 1 )
+ {
+// midn[j] += mi + fpenalty; //????
+ midn[j] += mi; //????
+ }
+#if STOREWM
+ WMMTX[i][j] += wm;
+// WMMTX2[i][j+1] += *mjpt + fpenalty;
+ WMMTX2[i][j+1] += *mjpt;
+#endif
+ *curpt += wm;
+
+ mjpt--;
+ prept--;
+ mpjpt--;
+ curpt--;
+ }
+// fprintf( stderr, "adding *mjpt (=%f) to WMMTX2[%d][%d]\n", *mjpt, i, j+1 );
+ g = *prept + fgcp1[i];
+ if( firstm < g )
+ {
+ firstm = g;
+ firstmp = i + 1;
+ }
+#if STOREWM
+ WMMTX2[i][j+1] += firstm;
+#endif
+ if( i == imid ) midm[j+1] += firstm;
+
+
+ if( i == imid - 1 )
+ {
+ maxwm = midw[1];
+ jmid = 0;
+// if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm );
+ for( j=2; j<lgth2-1; j++ )
+ {
+ wm = midw[j];
+ if( wm > maxwm )
+ {
+ jmid = j;
+ maxwm = wm;
+ }
+// if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm );
+ }
+ for( j=0; j<lgth2+1; j++ )
+ {
+ wm = midm[j];
+ if( wm > maxwm )
+ {
+ jmid = j;
+ maxwm = wm;
+ }
+// if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm );
+ }
+
+// if( depth == 1 ) fprintf( stderr, "maxwm!! = %f\n", maxwm );
+
+
+// fprintf( stderr, "### imid=%d, jmid=%d\n", imid, jmid );
+ wm = midw[jmid];
+ jumpi = imid-1;
+ jumpj = jmid-1;
+ if( jmid > 0 && midn[jmid-1] > wm ) //060413
+ {
+ jumpi = imid-1;
+ jumpj = jumpbacki[jmid];
+ wm = midn[jmid-1];
+// fprintf( stderr, "rejump (n)\n" );
+ }
+ if( midm[jmid] > wm )
+ {
+ jumpi = jumpbackj[jmid];
+ jumpj = jmid-1;
+ wm = midm[jmid];
+// fprintf( stderr, "rejump (m) jumpi=%d\n", jumpi );
+ }
+
+
+// fprintf( stderr, "--> imid=%d, jmid=%d\n", imid, jmid );
+// fprintf( stderr, "--> jumpi=%d, jumpj=%d\n", jumpi, jumpj );
+#if STOREWM
+ fprintf( stderr, "imid = %d\n", imid );
+ fprintf( stderr, "midn = \n" );
+ for( j=0; j<lgth2; j++ )
+ {
+ fprintf( stderr, "% 7.1f ", midn[j] );
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "midw = \n" );
+ for( j=0; j<lgth2; j++ )
+ {
+ fprintf( stderr, "% 7.1f ", midw[j] );
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "midm = \n" );
+ for( j=0; j<lgth2; j++ )
+ {
+ fprintf( stderr, "% 7.1f ", midm[j] );
+ }
+ fprintf( stderr, "\n" );
+#endif
+// fprintf( stderr, "maxwm = %f\n", maxwm );
+ }
+ if( i == jumpi ) //saki?
+ {
+// fprintf( stderr, "#### FIRST i=%d, jumpi<imid=%d<%d, ist=%d, ien=%d, firstmp=%d, lgth1=%d\n", i, jumpi, imid, ist, ien, firstmp, lgth1 );
+// fprintf( stderr, "#### mark 1\n" );
+// fprintf( stderr, "imid, jumpi = %d,%d\n", imid, jumpi );
+// fprintf( stderr, "jmid, jumpj = %d,%d\n", jmid, jumpj );
+
+ if( jmid == 0 )
+ {
+// fprintf( stderr, "#### CHUI2!\n" );
+ jumpj = 0; jmid = 1;
+#if 0 // v6.823 made
+ jumpi = firstmp-1;
+ imid = firstmp;
+#endif
+#if 0
+ jumpi = 0;
+ imid = 1;
+#else
+// if( 1 || firstmp > 100 ) // naze 100
+ if( imid < firstmp-1 ) // naze 100
+ {
+ jumpi = firstmp;
+ imid = firstmp+1;
+ }
+#if 0
+ else
+ {
+ jumpi = 0;
+ imid = 1;
+ }
+#endif
+#endif
+ }
+
+#if 0
+ else if( jmid == lgth2 )
+ {
+ fprintf( stderr, "CHUI1!\n" );
+ jumpi=0; jumpj=0;
+ imid=jumpforwi[0]; jmid=lgth2-1;
+ }
+#else // 060414
+ else if( jmid >= lgth2 )
+ {
+// fprintf( stderr, "CHUI1!\n" );
+ jumpi=imid-1; jmid=lgth2;
+ jumpj = lgth2-1;
+ }
+#endif
+ else
+ {
+// fprintf( stderr, "#### CHUI3!\n" );
+ imid = jumpforwi[jumpj];
+ jmid = jumpforwj[jumpj];
+ if( imid == jumpi ) jumpi = imid-1;
+ }
+#if 0
+ fprintf( stderr, "jumpi -> %d\n", jumpi );
+ fprintf( stderr, "jumpj -> %d\n", jumpj );
+ fprintf( stderr, "imid -> %d\n", imid );
+ fprintf( stderr, "jmid -> %d\n", jmid );
+#endif
+// fprintf( stderr, "#### FINAL i=%d, jumpi<imid=%d<%d, ist=%d, ien=%d, firstmp=%d\n", i, jumpi, imid, ist, ien, firstmp );
+
+#if STOREWM
+ break;
+#else
+ break;
+#endif
+ }
+ }
+
+// fprintf( stderr, "imid = %d, but jumpi = %d\n", imid, jumpi );
+// fprintf( stderr, "jmid = %d, but jumpj = %d\n", jmid, jumpj );
+
+// for( j=0; j<lgth2; j++ ) midw[j] += currentw[j];
+// for( j=0; j<lgth2; j++ ) midm[j] += m[j+1];
+// for( j=0; j<lgth2; j++ ) midw[j] += WMMTX[imid][j];
+// for( j=0; j<lgth2; j++ ) midw[j] += WMMTX[imid][j];
+
+
+#if STOREWM
+ fprintf( stderr, "WMMTX = \n" );
+ for( i=0; i<lgth1; i++ )
+ {
+ fprintf( stderr, "%d ", i );
+ for( j=0; j<lgth2; j++ )
+ {
+ fprintf( stderr, "% 7.2f ", WMMTX[i][j] );
+ }
+ fprintf( stderr, "\n" );
+ }
+// fprintf( stderr, "WMMTX2 = (p = %f)\n", fpenalty );
+ for( i=0; i<lgth1; i++ )
+ {
+ fprintf( stderr, "%d ", i );
+ for( j=0; j<lgth2+1; j++ )
+ {
+ fprintf( stderr, "% 7.2f ", WMMTX2[i][j] );
+ }
+ fprintf( stderr, "\n" );
+ }
+
+ fprintf( stderr, "jumpbacki = \n" );
+ for( j=0; j<lgth2; j++ )
+ {
+ fprintf( stderr, "% 7d ", jumpbacki[j] );
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "jumpbackj = \n" );
+ for( j=0; j<lgth2; j++ )
+ {
+ fprintf( stderr, "% 7d ", jumpbackj[j] );
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "jumpforwi = \n" );
+ for( j=0; j<lgth2; j++ )
+ {
+ fprintf( stderr, "% 7d ", jumpforwi[j] );
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "jumpforwj = \n" );
+ for( j=0; j<lgth2; j++ )
+ {
+ fprintf( stderr, "% 7d ", jumpforwj[j] );
+ }
+ fprintf( stderr, "\n" );
+
+
+#endif
+
+
+// Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp, icyc, jcyc );
+
+#if 0 // irukamo
+ resultlen = strlen( mseq1[0] );
+ if( alloclen < resultlen || resultlen > N )
+ {
+ fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
+ ErrorExit( "LENGTH OVER!\n" );
+ }
+#endif
+
+
+
+#if 0
+ fprintf( stderr, "jumpi = %d, imid = %d\n", jumpi, imid );
+ fprintf( stderr, "jumpj = %d, jmid = %d\n", jumpj, jmid );
+
+ fprintf( stderr, "imid = %d\n", imid );
+ fprintf( stderr, "jmid = %d\n", jmid );
+#endif
+
+ freearrays_rec1_variousdist
+ (
+ w1, w2, initverticalw, lastverticalw, midw, midm, midn,
+ jumpbacki, jumpbackj, jumpforwi, jumpforwj, jumpdummi, jumpdummj,
+ m, mp,
+ doublework, intwork
+#if STOREWM
+ , WMMTX, WMMTX2
+#endif
+ );
+
+
+// fprintf( stderr, "==== calling myself (first)\n" );
+
+ value = MSalignmm_rec_variousdist( matrices, icyc, jcyc, seq1, seq2, cpmx1s, cpmx2s, ist, ist+jumpi, jst, jst+jumpj, alloclen, fulllen1, fulllen2, aseq1, aseq2, depth, gapinfo, NULL, 0, NULL, headgp, tailgp, headgapfreq1_g, headgapfreq2_g ); // chudan mada
+#if 0
+ fprintf( stderr, "aseq1[0] = %s\n", aseq1[0] );
+ fprintf( stderr, "aseq2[0] = %s\n", aseq2[0] );
+#endif
+#if MEMSAVE
+#else
+ for( i=0; i<icyc; i++ ) strcpy( mseq1[i], aseq1[i] );
+ for( i=0; i<jcyc; i++ ) strcpy( mseq2[i], aseq2[i] );
+#endif
+
+// fprintf( stderr, "====(f) aseq1[0] (%d) = %s (%d-%d)\n", depth, aseq1[0], ist, ien );
+// fprintf( stderr, "====(f) aseq2[0] (%d) = %s (%d-%d)\n", depth, aseq2[0], jst, jen );
+
+ len = strlen( mseq1[0] );
+// fprintf( stderr, "len = %d\n", len );
+ l = jmid - jumpj - 1;
+// fprintf( stderr, "l=%d\n", l );
+ if( l > 0 )
+ {
+// for( i=0; i<l; i++ ) gaps[i] = '-'; gaps[i] = 0;
+ for( i=0; i<l; i++ ) gaps[i] = *newgapstr; gaps[i] = 0;
+ for( i=0; i<icyc; i++ )
+ {
+ strcat( mseq1[i], gaps );
+ mseq1[i][len+l] = 0;
+ }
+ for( j=0; j<jcyc; j++ )
+ {
+ strncat( mseq2[j], seq2[j]+jst+jumpj+1, l );
+ mseq2[j][len+l] = 0;
+ }
+// fprintf( stderr, "penalizing (2) .. %f(%d), %f(%d)\n", ogcp2[jumpj+1], jumpj+1, fgcp2[jmid-1], jmid-1 );
+ value += ( ogcp2[jumpj+1] + fgcp2[jmid-1] );
+// value += fpenalty;
+ }
+ len = strlen( mseq1[0] );
+ l = imid - jumpi - 1;
+// fprintf( stderr, "l=%d\n", l );
+ if( l > 0 )
+ {
+// for( i=0; i<l; i++ ) gaps[i] = '-'; gaps[i] = 0;
+ for( i=0; i<l; i++ ) gaps[i] = *newgapstr; gaps[i] = 0;
+ for( i=0; i<icyc; i++ )
+ {
+ strncat( mseq1[i], seq1[i]+ist+jumpi+1, l );
+ mseq1[i][len+l] = 0;
+ }
+ for( j=0; j<jcyc; j++ )
+ {
+ strcat( mseq2[j], gaps );
+ mseq2[j][len+l] = 0;
+ }
+
+// for( i=0; i<lgth1; i++ ) fprintf( stderr, "ogcp1[%d] = %f\n", i, ogcp1[i] );
+// for( i=0; i<lgth1; i++ ) fprintf( stderr, "fgcp1[%d] = %f\n", i, fgcp1[i] );
+
+
+// fprintf( stderr, "penalizing (1) .. ogcp1[%d] = %f, fgcp1[%d] = %f\n", jumpi+1, ogcp1[jumpi+1], imid-1, fgcp1[imid-1] );
+ value += ( ogcp1[jumpi+1] + fgcp1[imid-1] );
+// value += fpenalty;
+ }
+#if 0
+ for( i=0; i<icyc; i++ ) fprintf( stderr, "after gapfill mseq1[%d]=%s\n", i, mseq1[i] );
+ for( i=0; i<jcyc; i++ ) fprintf( stderr, "after gapfill mseq2[%d]=%s\n", i, mseq2[i] );
+#endif
+
+// fprintf( stderr, "==== calling myself (second)\n" );
+
+#if MEMSAVE
+ alnlen = strlen( aseq1[0] );
+ for( i=0; i<icyc; i++ ) aseq1[i] += alnlen;
+ for( i=0; i<jcyc; i++ ) aseq2[i] += alnlen;
+#endif
+
+ value += MSalignmm_rec_variousdist( matrices, icyc, jcyc, seq1, seq2, cpmx1s, cpmx2s, ist+imid, ien, jst+jmid, jen, alloclen, fulllen1, fulllen2, aseq1, aseq2, depth, gapinfo, NULL, 0, NULL, headgp, tailgp, headgapfreq1_g, headgapfreq2_g ); // chudan mada
+#if 0
+ fprintf( stderr, "aseq1[0] = %s\n", aseq1[0] );
+ fprintf( stderr, "aseq2[0] = %s\n", aseq2[0] );
+#endif
+
+
+
+#if DEBUG
+ if( value - maxwm > 1 || maxwm - value > 1 )
+ {
+ fprintf( stderr, "WARNING value = %f, but maxwm = %f\n", value, maxwm );
+ for( i=0; i<icyc; i++ )
+ {
+ fprintf( stderr, ">1-%d\n%s\n", i, mseq1[i] );
+ fprintf( stderr, "%s\n", aseq1[i] );
+ }
+ for( i=0; i<jcyc; i++ )
+ {
+ fprintf( stderr, ">2-%d\n%s\n", i, mseq2[i] );
+ fprintf( stderr, "%s\n", aseq2[i] );
+ }
+
+// exit( 1 );
+ }
+ else
+ {
+ fprintf( stderr, "value = %.0f, maxwm = %.0f -> ok\n", value, maxwm );
+ }
+#endif
+
+#if MEMSAVE
+#else
+ for( i=0; i<icyc; i++ ) strcat( mseq1[i], aseq1[i] );
+ for( i=0; i<jcyc; i++ ) strcat( mseq2[i], aseq2[i] );
+#endif
+
+// fprintf( stderr, "====(s) aseq1[0] (%d) = \n%s\n (a%d-a%d)\n", depth, mseq1[0], ist, ien );
+// fprintf( stderr, "====(s) aseq2[0] (%d) = \n%s\n (b%d-b%d)\n", depth, mseq2[0], jst, jen );
+
+ freearrays_rec2( gaps, aseq1, aseq2 );
+
+#if 0
+ if( seqlen( seq1[0] ) != nglen1 )
+ {
+ fprintf( stderr, "bug! hairetsu ga kowareta! (nglen1) seqlen(seq1[0])=%d but nglen1=%d\n", seqlen( seq1[0] ), nglen1 );
+ fprintf( stderr, "seq1[0] = %s\n", seq1[0] );
+ exit( 1 );
+ }
+ else
+ fprintf( stderr, "nglen1 is ok in _rec\n" );
+ if( seqlen( seq2[0] ) != nglen2 )
+ {
+ fprintf( stderr, "bug! hairetsu ga kowareta! (nglen2) seqlen(seq2[0])=%d but nglen2=%d\n", seqlen( seq2[0] ), nglen2 );
+ exit( 1 );
+ }
+ else
+ fprintf( stderr, "nglen2 is ok in _rec\n" );
+#endif
+
+ return( value );
+}
+
+static void freearrays_variousdist(
+ double *ogcp1,
+ double *ogcp2,
+ double *fgcp1,
+ double *fgcp2,
+ double ***cpmx1s,
+ double ***cpmx2s,
+ double *gapfreq1f,
+ double *gapfreq2f,
+ double **gapinfo,
+ char **mseq1,
+ char **mseq2
+)
+{
+ FreeFloatVec( ogcp1 );
+ FreeFloatVec( ogcp2 );
+ FreeFloatVec( fgcp1 );
+ FreeFloatVec( fgcp2 );
+ FreeFloatCub( cpmx1s );
+ FreeFloatCub( cpmx2s );
+ FreeFloatVec( gapfreq1f );
+ FreeFloatVec( gapfreq2f );
+ free( (void *)gapinfo );
+
+ FreeCharMtx( mseq1 );
+ FreeCharMtx( mseq2 );
+}
+
+
+double MSalignmm_variousdist( double **pairoffset, double ***matrices, double **dummy_mtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int icyc, int jcyc, int alloclen, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp )
+/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
+{
+// fprintf( stderr, "IN MSalignmm_variousdist\n" );
+// int k;
+ int i, j, c;
+ int ll1, ll2;
+ int lgth1, lgth2;
+ double wm = 0.0; /* int ?????? */
+ char **mseq1;
+ char **mseq2;
+ double *ogcp1;
+ double *ogcp2;
+ double *fgcp1;
+ double *fgcp2;
+ double ***cpmx1s;
+ double ***cpmx2s;
+ double **gapinfo;
+ double fpenalty = (double)penalty;
+ double *gapfreq1f;
+ double *gapfreq2f;
+ int nglen1, nglen2;
+ double headgapfreq1;
+ double headgapfreq2;
+
+#if 0
+ fprintf( stderr, "eff in SA+++align\n" );
+ for( i=0; i<icyc; i++ ) fprintf( stderr, "eff1[%d] = %f\n", i, eff1[i] );
+#endif
+
+ nglen1 = seqlen( seq1[0] );
+ nglen2 = seqlen( seq2[0] );
+
+#if 0
+ fprintf( stderr, "\n" );
+ for( i=0; i<icyc; i++ ) fprintf( stderr, "seq1[%d] at root = %s\n", i, seq1[i] );
+ for( j=0; j<jcyc; j++ ) fprintf( stderr, "seq2[%d] at root = %s\n", j, seq2[j] );
+ fprintf( stderr, "\n" );
+#endif
+
+ lgth1 = strlen( seq1[0] );
+ lgth2 = strlen( seq2[0] );
+
+ ll1 = ( (int)(lgth1) ) + 100;
+ ll2 = ( (int)(lgth2) ) + 100;
+
+ mseq1 = AllocateCharMtx( icyc, ll1+ll2 );
+ mseq2 = AllocateCharMtx( jcyc, ll1+ll2 );
+
+ gapinfo = AllocateFloatMtx( 6, 0 );
+ ogcp1 = AllocateFloatVec( ll1+2 );
+ ogcp2 = AllocateFloatVec( ll2+2 );
+ fgcp1 = AllocateFloatVec( ll1+2 );
+ fgcp2 = AllocateFloatVec( ll2+2 );
+
+
+ cpmx1s = AllocateFloatCub( maxdistclass, ll1+2, nalphabets+1 );
+ cpmx2s = AllocateFloatCub( maxdistclass, ll2+2, nalphabets+1 );
+
+ gapfreq1f = AllocateFloatVec( ll1+2 ); // must be filled with 0.0
+ gapfreq2f = AllocateFloatVec( ll2+2 ); // must be filled with 0.0
+
+ for( i=0; i<icyc; i++ )
+ {
+ if( strlen( seq1[i] ) != lgth1 )
+ {
+ fprintf( stderr, "i = %d / %d\n", i, icyc );
+ fprintf( stderr, "bug! hairetsu ga kowareta!\n" );
+ exit( 1 );
+ }
+ }
+ for( j=0; j<jcyc; j++ )
+ {
+ if( strlen( seq2[j] ) != lgth2 )
+ {
+ fprintf( stderr, "j = %d / %d\n", j, jcyc );
+ fprintf( stderr, "bug! hairetsu ga kowareta!\n" );
+ exit( 1 );
+ }
+ }
+
+ for( c=0; c<maxdistclass; c++ )
+ {
+ MScpmx_calc_new( seq1, cpmx1s[c], eff1s[c], lgth1, icyc );
+ MScpmx_calc_new( seq2, cpmx2s[c], eff2s[c], lgth2, jcyc );
+ }
+
+
+#if 1
+
+ if( sgap1 )
+ {
+ new_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1, sgap1 );
+ new_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2, sgap2 );
+ new_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1, egap2 );
+ new_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2, egap2 );
+ outgapcount( &headgapfreq1, icyc, sgap1, eff1 );
+ outgapcount( &headgapfreq2, jcyc, sgap2, eff2 );
+ outgapcount( gapfreq1f+lgth1, icyc, egap1, eff1 );
+ outgapcount( gapfreq2f+lgth2, jcyc, egap2, eff2 );
+ }
+ else
+ {
+ st_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1 );
+ st_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2 );
+ st_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1 );
+ st_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2 );
+ headgapfreq1 = 0.0;
+ headgapfreq2 = 0.0;
+ gapfreq1f[lgth1] = 0.0;
+ gapfreq2f[lgth2] = 0.0;
+ }
+
+ if( legacygapcost == 0 )
+ {
+ gapcountf( gapfreq1f, seq1, icyc, eff1, lgth1 );
+ gapcountf( gapfreq2f, seq2, jcyc, eff2, lgth2 );
+ for( i=0; i<lgth1+1; i++ ) gapfreq1f[i] = 1.0 - gapfreq1f[i];
+ for( i=0; i<lgth2+1; i++ ) gapfreq2f[i] = 1.0 - gapfreq2f[i];
+ headgapfreq1 = 1.0 - headgapfreq1;
+ headgapfreq2 = 1.0 - headgapfreq2;
+ }
+ else
+ {
+ for( i=0; i<lgth1+1; i++ ) gapfreq1f[i] = 1.0;
+ for( i=0; i<lgth2+1; i++ ) gapfreq2f[i] = 1.0;
+ headgapfreq1 = 1.0;
+ headgapfreq2 = 1.0;
+ }
+
+#if 1
+ for( i=0; i<lgth1; i++ )
+ {
+ ogcp1[i] = 0.5 * ( 1.0 - ogcp1[i] ) * fpenalty * ( gapfreq1f[i] );
+ fgcp1[i] = 0.5 * ( 1.0 - fgcp1[i] ) * fpenalty * ( gapfreq1f[i] );
+// fprintf( stderr, "fgcp1[%d] = %f\n", i, fgcp1[i] );
+ }
+ for( i=0; i<lgth2; i++ )
+ {
+ ogcp2[i] = 0.5 * ( 1.0 - ogcp2[i] ) * fpenalty * ( gapfreq2f[i] );
+ fgcp2[i] = 0.5 * ( 1.0 - fgcp2[i] ) * fpenalty * ( gapfreq2f[i] );
+// fprintf( stderr, "fgcp2[%d] = %f\n", i, fgcp2[i] );
+ }
+#else
+ for( i=0; i<lgth1; i++ )
+ {
+ ogcp1[i] = 0.5 * fpenalty;
+ fgcp1[i] = 0.5 * fpenalty;
+ }
+ for( i=0; i<lgth2; i++ )
+ {
+ ogcp2[i] = 0.5 * fpenalty;
+ fgcp2[i] = 0.5 * fpenalty;
+ }
+#endif
+
+ gapinfo[0] = ogcp1;
+ gapinfo[1] = fgcp1;
+ gapinfo[2] = ogcp2;
+ gapinfo[3] = fgcp2;
+ gapinfo[4] = gapfreq1f;
+ gapinfo[5] = gapfreq2f;
+#endif
+
+#if 0
+ fprintf( stdout, "in MSalignmm.c\n" );
+ for( i=0; i<icyc; i++ )
+ {
+ fprintf( stdout, ">%d of GROUP1\n", i );
+ fprintf( stdout, "%s\n", seq1[i] );
+ }
+ for( i=0; i<jcyc; i++ )
+ {
+ fprintf( stdout, ">%d of GROUP2\n", i );
+ fprintf( stdout, "%s\n", seq2[i] );
+ }
+ fflush( stdout );
+#endif
+
+ wm = MSalignmm_rec_variousdist( matrices, icyc, jcyc, seq1, seq2, cpmx1s, cpmx2s, 0, lgth1-1, 0, lgth2-1, alloclen, lgth1, lgth2, mseq1, mseq2, 0, gapinfo, chudanpt, chudanref, chudanres, headgp, tailgp, headgapfreq1, headgapfreq2 );
+#ifdef enablemultithread
+ if( chudanres && *chudanres )
+ {
+// fprintf( stderr, "\n\n## CHUUDAN!!! relay\n" );
+ *chudanres = 1;
+ freearrays_variousdist( ogcp1, ogcp2, fgcp1, fgcp2, cpmx1s, cpmx2s, gapfreq1f, gapfreq2f, gapinfo, mseq1, mseq2 );
+ return( -1.0 );
+ }
+#endif
+
+#if 0
+ fprintf( stderr, "\n" );
+ fprintf( stderr, " seq1[0] = %s\n", seq1[0] );
+ fprintf( stderr, " seq2[0] = %s\n", seq2[0] );
+ fprintf( stderr, "mseq1[0] = %s\n", mseq1[0] );
+ fprintf( stderr, "mseq2[0] = %s\n", mseq2[0] );
+ fprintf( stderr, "\n" );
+#endif
+
+// fprintf( stderr, "wm = %f\n", wm );
+
+
+ for( i=0; i<icyc; i++ ) strcpy( seq1[i], mseq1[i] );
+ for( i=0; i<jcyc; i++ ) strcpy( seq2[i], mseq2[i] );
+
+ if( seqlen( seq1[0] ) != nglen1 )
+ {
+ fprintf( stderr, "bug! hairetsu ga kowareta! (nglen1) seqlen(seq1[0])=%d but nglen1=%d\n", seqlen( seq1[0] ), nglen1 );
+ fprintf( stderr, "seq1[0] = %s\n", seq1[0] );
+ exit( 1 );
+ }
+ if( seqlen( seq2[0] ) != nglen2 )
+ {
+ fprintf( stderr, "bug! hairetsu ga kowareta! (nglen2) seqlen(seq2[0])=%d but nglen2=%d\n", seqlen( seq2[0] ), nglen2 );
+ exit( 1 );
+ }
+
+
+ freearrays_variousdist( ogcp1, ogcp2, fgcp1, fgcp2, cpmx1s, cpmx2s, gapfreq1f, gapfreq2f, gapinfo, mseq1, mseq2 );
+
+ lgth1 = strlen( seq1[0] );
+ lgth2 = strlen( seq2[0] );
+ for( i=0; i<icyc; i++ )
+ {
+ if( strlen( seq1[i] ) != lgth1 )
+ {
+ fprintf( stderr, "i = %d / %d\n", i, icyc );
+ fprintf( stderr, "hairetsu ga kowareta (end of MSalignmm) !\n" );
+ exit( 1 );
+ }
+ }
+ for( j=0; j<jcyc; j++ )
+ {
+ if( strlen( seq2[j] ) != lgth2 )
+ {
+ fprintf( stderr, "j = %d / %d\n", j, jcyc );
+ fprintf( stderr, "hairetsu ga kowareta (end of MSalignmm) !\n" );
+ exit( 1 );
+ }
+ }
+
+#if 0
+ fprintf( stderr, "\n" );
+ for( i=0; i<icyc; i++ ) fprintf( stderr, " seq1[i] = %s\n", seq1[i] );
+ for( j=0; j<jcyc; j++ ) fprintf( stderr, " seq2[j] = %s\n", seq2[j] );
+ fprintf( stderr, "\n" );
+#endif
+
return( wm );
}
LIBDIR = $(PREFIX)/libexec/mafft
BINDIR = $(PREFIX)/bin
MANDIR = $(PREFIX)/share/man/man1
+DESTDIR =
#MNO_CYGWIN = -mno-cygwin
-#ENABLE_MULTITHREAD = -Denablemultithread
-#Uncomment this to enable multithreading (linux only)
+ENABLE_MULTITHREAD = -Denablemultithread
+# Comment out the above line if your compiler
+# does not support TLS (thread-local strage).
CC = gcc
-CFLAGS = -O3
-#CFLAGS = -O0 -pedantic -Wall -std=c99 -g -pg -DMALLOC_CHECK_=3
+CFLAGS = -O3 -std=c99
+#CFLAGS = -O3 -fPIC
+# add -fPIC when building .so files
+
+#CC = icc
+#CFLAGS = -fast
+# if you have icc, use this.
+
+#CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -pg -DMALLOC_CHECK_=3
+#CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -pg -DMALLOC_CHECK_=3 -fprofile-arcs -ftest-coverage
+#CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -DMALLOC_CHECK_=3 # for shark, valgrind
+#CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -DMALLOC_CHECK_=3 -lprofiler # ?
+
MYCFLAGS = $(MNO_CYGWIN) $(ENABLE_MULTITHREAD) $(CFLAGS)
-ifeq ($(ENABLE_MULTITHREAD),-Denablemultithread)
+ifdef ENABLE_MULTITHREAD
LIBS = -lm -lpthread
else
LIBS = -lm
INSTALL = install
PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance pairlocalalign \
- pair2hat3s multi2hat3s rnatest pairash \
+ multi2hat3s pairash addsingle \
splittbfast disttbfast tbfast mafft-profile f2cl mccaskillwrap contrafoldwrap countlen \
- seq2regtable regtable2seq score getlag dndpre dndpre2 setcore replaceu restoreu
-SCRIPTS = mafft mafft-homologs.rb
+ seq2regtable regtable2seq score getlag dndpre setcore replaceu restoreu setdirection makedirectionlist version
+SOS = libdisttbfast.so
+DLLS = libdisttbfast.dll
+DYLIBS = libdisttbfast.dylib
+
+PERLPROGS = mafftash_premafft.pl seekquencer_premafft.pl
+SCRIPTS = mafft mafft-homologs.rb mafft-sparsecore.rb
+OBJSETDIRECTION = mtxutl.o io.o setdirection.o defs.o mltaln9.o
OBJREPLACEU = mtxutl.o io.o replaceu.o defs.o mltaln9.o
OBJRESTOREU = mtxutl.o io.o restoreu.o defs.o mltaln9.o
OBJREGTABLE2SEQ = mtxutl.o io.o regtable2seq.o defs.o mltaln9.o
OBJCONTRAFOLDWRAP = mtxutl.o io.o contrafoldwrap.o constants.o defs.o mltaln9.o
OBJMULTI2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \
multi2hat3s.o defs.o fft.o fftFunctions.o
-OBJPAIR2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \
- pair2hat3s.o defs.o fft.o fftFunctions.o
-OBJRNATEST = mtxutl.o io.o mltaln9.o tddis.o constants.o Lalignmm.o \
- rna.o rnatest.o defs.o fft.o fftFunctions.o
-OBJPAIRASH = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
- Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genGalign11.o genalign11.o Lalign11.o SAalignmm.o \
+OBJPAIRASH = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
+ Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genalign11.o Lalign11.o SAalignmm.o \
pairash.o defs.o fft.o fftFunctions.o
-OBJPAIRLOCALALIGN = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
- Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genGalign11.o genalign11.o Lalign11.o SAalignmm.o \
- pairlocalalign.o defs.o fft.o fftFunctions.o
-OBJDUMMY = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
+OBJPAIRLOCALALIGN = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
+ Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genalign11.o Lalign11.o SAalignmm.o \
+ pairlocalalignmain.o pairlocalalign.o defs.o fft.o fftFunctions.o
+OBJDUMMY = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
disttbfast_dummy.o dummy.o defs.o fft.o fftFunctions.o
-OBJSPLITFROMALN = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
+OBJSPLITFROMALN = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
Lalign11.o splitfromaln.o defs.o fft.o fftFunctions.o
-OBJSPLITTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
+OBJSPLITTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
- Lalign11.o splittbfast.o defs.o fft.o fftFunctions.o
-OBJSPLITTBFAST2 = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
+ Lalign11.o splittbfast.o defs.o fft.o fftFunctions.o
+OBJSPLITTBFAST2 = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
Lalign11.o splittbfast2.o defs.o fft.o fftFunctions.o
-OBJSPLITTBFASTP = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
+OBJSPLITTBFASTP = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
Lalign11.o defs.o fft.o fftFunctions.o
-OBJDISTTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
+OBJDISTTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
disttbfast.o defs.o fft.o fftFunctions.o addfunctions.o
-OBJTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o MSalignmm.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
+OBJMAKEDIRECTIONLIST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
+ Lalign11.o makedirectionlist.o defs.o fft.o fftFunctions.o addfunctions.o
+OBJTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o MSalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
Falign.o Falign_localhom.o Galign11.o SAalignmm.o \
- tbfast.o defs.o fft.o fftFunctions.o addfunctions.o
-OBJTBFAST2 = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o MSalignmm.o \
+ tbfast.o defs.o fft.o fftFunctions.o addfunctions.o \
+ pairlocalalign.o genalign11.o MSalign11.o Lalign11.o
+OBJADDSINGLE = mtxutl.o io.o mltaln9.o tddis.o constants.o MSalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o \
+ addsingle.o defs.o fft.o fftFunctions.o addfunctions.o
+OBJTBFAST2 = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o MSalignmm.o Dalignmm.o \
Falign.o Falign_localhom.o Galign11.o SAalignmm.o \
tbfast2.o defs.o fft.o fftFunctions.o
-OBJSETCORE = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
+OBJSETCORE = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
setcore.o defs.o fft.o fftFunctions.o
-OBJTDITR = mtxutl.o io.o mltaln9.o tddis.o constants.o nj.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
+OBJTDITR = mtxutl.o io.o mltaln9.o tddis.o constants.o nj.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
Falign.o Falign_localhom.o Galign11.o fftFunctions.o fft.o \
tditeration.o tditr.o defs.o SAalignmm.o treeOperation.o
-OBJDVTDITR = mtxutl.o io.o mltaln9.o tddis.o constants.o nj.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
+OBJDVTDITR = mtxutl.o io.o mltaln9.o tddis.o constants.o nj.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
Falign.o Falign_localhom.o Galign11.o MSalignmm.o fftFunctions.o fft.o \
- tditeration.o dvtditr.o defs.o SAalignmm.o treeOperation.o
-OBJGETLAG = mtxutl.o io.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
- Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
+ tditeration.o dvtditr.o defs.o SAalignmm.o treeOperation.o addfunctions.o
+OBJGETLAG = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o Dalignmm.o \
getlag.o defs.o fft.o fftFunctions.o
OBJGAPFILL = mtxutl.o io.o constants.o gapfill.o defs.o
OBJDNDFAST5 = dndfast5.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o
OBJTRIPLET5 = io.o constants.o mtxutl.o mltaln9.o tddis.o triplet5.o defs.o
OBJOCTET4 = io.o constants.o mtxutl.o mltaln9.o tddis.o octet4.o defs.o
OBJDNDPRE = dndpre.o io.o constants.o mtxutl.o mltaln9.o defs.o
-OBJDNDPRESCORE = dndpre2.o io.o constants.o mtxutl.o mltaln9.o defs.o
-OBJGALN = io.o mtxutl.o mltaln9.o tddis.o constants.o partQalignmm.o partSalignmm.o MSalignmm.o Lalignmm.o rna.o Salignmm.o Ralignmm.o Qalignmm.o Halignmm.o \
+OBJGALN = io.o mtxutl.o mltaln9.o tddis.o constants.o partSalignmm.o MSalignmm.o Lalignmm.o rna.o Salignmm.o Dalignmm.o \
SAalignmm.o Galign11.o Falign.o Falign_localhom.o fftFunctions.o fft.o mafft-profile.o defs.o
OBJSCORE = io.o mtxutl.o mltaln9.o score.o constants.o defs.o
-HEADER = mltaln.h mtxutl.h
+HEADER = mltaln.h mtxutl.h mafft.h
FFTHEADER = fft.h
MANPAGES = mafft.1 mafft-homologs.1
-all : $(PROGS) $(SCRIPTS)
+
+all : $(PERLPROGS) $(PROGS) $(SCRIPTS)
cp $(SCRIPTS) ../scripts
chmod 755 ../scripts/*
- cp $(PROGS) ../binaries
+ cp $(PERLPROGS) $(PROGS) ../binaries
chmod 755 ../binaries/*
cp $(MANPAGES) ../binaries
@echo done.
+sos : $(SOS)
+dylibs : $(DYLIBS)
+dlls : $(DLLS)
+
univscript: univscript.tmpl Makefile
sed "s:_PROGS:$(PROGS):" univscript.tmpl > univscript
-mafft: mafft.tmpl
+mafft: mafft.tmpl mltaln.h
sed "s:_LIBDIR:$(LIBDIR):" mafft.tmpl > mafft
mafft-homologs.rb: mafft-homologs.tmpl
# cp mafft-homologs.tmpl mafft-homologs.rb
sed "s:_BINDIR:$(BINDIR):" mafft-homologs.tmpl > mafft-homologs.rb
+mafft-sparsecore.rb: mafft-sparsecore.tmpl
+# cp mafft-sparsecore.tmpl mafft-sparsecore.rb
+ sed "s:_BINDIR:$(BINDIR):" mafft-sparsecore.tmpl > mafft-sparsecore.rb
+
mltaln.h : functions.h
touch mltaln.h
+version : version.c mltaln.h
+ $(CC) -o $@ version.c $(MYCFLAGS) $(LDFLAGS)
+
tbfast : $(OBJTBFAST)
- $(CC) -o $@ $(OBJTBFAST) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+addsingle : $(OBJADDSINGLE)
+ $(CC) -o $@ $(OBJADDSINGLE) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
tbfast2 : $(OBJTBFAST2)
- $(CC) -o $@ $(OBJTBFAST2) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJTBFAST2) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
disttbfast : $(OBJDISTTBFAST)
- $(CC) -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+libdisttbfast.so : $(OBJDISTTBFAST)
+ $(CC) -shared -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+libdisttbfast.dylib : $(OBJDISTTBFAST)
+ $(CC) -dynamiclib -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+libdisttbfast.dll : $(OBJDISTTBFAST)
+ $(CC) -shared -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+makedirectionlist : $(OBJMAKEDIRECTIONLIST)
+ $(CC) -o $@ $(OBJMAKEDIRECTIONLIST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
splittbfast : $(OBJSPLITTBFAST)
- $(CC) -o $@ $(OBJSPLITTBFAST) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJSPLITTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
splitfromaln : $(OBJSPLITFROMALN)
- $(CC) -o $@ $(OBJSPLITFROMALN) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJSPLITFROMALN) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
splittbfast2 : $(OBJSPLITTBFAST2)
- $(CC) -o $@ $(OBJSPLITTBFAST2) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJSPLITTBFAST2) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
dummy : $(OBJDUMMY)
- $(CC) -o $@ $(OBJDUMMY) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJDUMMY) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
setcore : $(OBJSETCORE)
- $(CC) -o $@ $(OBJSETCORE) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJSETCORE) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
countlen : $(OBJCOUNTLEN)
- $(CC) -o $@ $(OBJCOUNTLEN) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJCOUNTLEN) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
seq2regtable : $(OBJSEQ2REGTABLE)
- $(CC) -o $@ $(OBJSEQ2REGTABLE) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJSEQ2REGTABLE) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
regtable2seq : $(OBJREGTABLE2SEQ)
- $(CC) -o $@ $(OBJREGTABLE2SEQ) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJREGTABLE2SEQ) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+setdirection : $(OBJSETDIRECTION)
+ $(CC) -o $@ $(OBJSETDIRECTION) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
replaceu : $(OBJREPLACEU)
- $(CC) -o $@ $(OBJREPLACEU) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJREPLACEU) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
restoreu : $(OBJRESTOREU)
- $(CC) -o $@ $(OBJRESTOREU) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJRESTOREU) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
f2cl : $(OBJF2CL)
- $(CC) -o $@ $(OBJF2CL) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJF2CL) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
mccaskillwrap : $(OBJMCCASKILLWRAP)
- $(CC) -o $@ $(OBJMCCASKILLWRAP) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJMCCASKILLWRAP) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
contrafoldwrap : $(OBJCONTRAFOLDWRAP)
- $(CC) -o $@ $(OBJCONTRAFOLDWRAP) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJCONTRAFOLDWRAP) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
pairlocalalign : $(OBJPAIRLOCALALIGN)
- $(CC) -o $@ $(OBJPAIRLOCALALIGN) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJPAIRLOCALALIGN) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
pairash : $(OBJPAIRASH)
- $(CC) -o $@ $(OBJPAIRASH) $(MYCFLAGS) $(LIBS)
-
-rnatest : $(OBJRNATEST)
- $(CC) -o $@ $(OBJRNATEST) $(MYCFLAGS) $(LIBS)
-
-pair2hat3s : $(OBJPAIR2HAT3S)
- $(CC) -o $@ $(OBJPAIR2HAT3S) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJPAIRASH) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
multi2hat3s : $(OBJMULTI2HAT3S)
- $(CC) -o $@ $(OBJMULTI2HAT3S) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJMULTI2HAT3S) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
getlag : $(OBJGETLAG)
- $(CC) -o $@ $(OBJGETLAG) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJGETLAG) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
tditr : $(OBJTDITR)
- $(CC) -o $@ $(OBJTDITR) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJTDITR) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
dvtditr : $(OBJDVTDITR)
- $(CC) -o $@ $(OBJDVTDITR) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJDVTDITR) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
mafft-profile : $(OBJGALN)
- $(CC) -o $@ $(OBJGALN) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJGALN) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
gapfill : $(OBJGAPFILL)
- $(CC) -o $@ $(OBJGAPFILL) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJGAPFILL) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
dndfast4 : $(OBJDNDFAST4)
- $(CC) -o $@ $(OBJDNDFAST4) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJDNDFAST4) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
dndfast5 : $(OBJDNDFAST5)
- $(CC) -o $@ $(OBJDNDFAST5) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJDNDFAST5) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
dndfast6 : $(OBJDNDFAST6)
- $(CC) -o $@ $(OBJDNDFAST6) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJDNDFAST6) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
dndfast7 : $(OBJDNDFAST7)
- $(CC) -o $@ $(OBJDNDFAST7) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJDNDFAST7) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
dndblast : $(OBJDNDBLAST)
- $(CC) -o $@ $(OBJDNDBLAST) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJDNDBLAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
dndfast3 : $(OBJDNDFAST3)
- $(CC) -o $@ $(OBJDNDFAST3) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJDNDFAST3) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
triplet : $(OBJTRIPLET)
- $(CC) -o $@ $(OBJTRIPLET) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJTRIPLET) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
triplet3 : $(OBJTRIPLET3)
- $(CC) -o $@ $(OBJTRIPLET3) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJTRIPLET3) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
sextet3 : $(OBJSEXTET3)
- $(CC) -o $@ $(OBJSEXTET3) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJSEXTET3) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
sextet4 : $(OBJSEXTET4)
- $(CC) -o $@ $(OBJSEXTET4) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJSEXTET4) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
sextet5 : $(OBJSEXTET5)
- $(CC) -o $@ $(OBJSEXTET5) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJSEXTET5) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
mafft-distance : $(OBJDISTANCE)
- $(CC) -o $@ $(OBJDISTANCE) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJDISTANCE) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
triplet5 : $(OBJTRIPLET5)
- $(CC) -o $@ $(OBJTRIPLET5) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJTRIPLET5) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
triplet6 : $(OBJTRIPLET6)
- $(CC) -o $@ $(OBJTRIPLET6) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJTRIPLET6) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
octet4 : $(OBJOCTET4)
- $(CC) -o $@ $(OBJOCTET4) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJOCTET4) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
dndpre : $(OBJDNDPRE)
- $(CC) -o $@ $(OBJDNDPRE) $(MYCFLAGS) $(LIBS)
-
-dndpre2 : $(OBJDNDPRESCORE)
- $(CC) -o $@ $(OBJDNDPRESCORE) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJDNDPRE) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
score : $(OBJSCORE)
- $(CC) -o $@ $(OBJSCORE) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJSCORE) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
genMtx : $(OBJGENMTX)
- $(CC) -o $@ $(OBJGENMTX) $(MYCFLAGS) $(LIBS)
+ $(CC) -o $@ $(OBJGENMTX) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+mafftash_premafft.pl : mafftash_premafft.tmpl
+ cp mafftash_premafft.tmpl mafftash_premafft.pl
+
+seekquencer_premafft.pl : seekquencer_premafft.tmpl
+ cp seekquencer_premafft.tmpl seekquencer_premafft.pl
gapfill.o : gapfill.c $(HEADER)
$(CC) $(MYCFLAGS) -c gapfill.c
Salignmm.o : Salignmm.c $(HEADER)
$(CC) $(MYCFLAGS) -c Salignmm.c
-Halignmm.o : Halignmm.c $(HEADER)
- $(CC) $(MYCFLAGS) -c Halignmm.c
-
-Ralignmm.o : Ralignmm.c $(HEADER)
- $(CC) $(MYCFLAGS) -c Ralignmm.c
-
-Qalignmm.o : Qalignmm.c $(HEADER)
- $(CC) $(MYCFLAGS) -c Qalignmm.c
+Dalignmm.o : Dalignmm.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c Dalignmm.c
MSalignmm.o : MSalignmm.c $(HEADER)
$(CC) $(MYCFLAGS) -c MSalignmm.c
partSalignmm.o : partSalignmm.c $(HEADER)
$(CC) $(MYCFLAGS) -c partSalignmm.c
-partQalignmm.o : partQalignmm.c $(HEADER)
- $(CC) $(MYCFLAGS) -c partQalignmm.c
-
Lalign11.o : Lalign11.c $(HEADER)
$(CC) $(MYCFLAGS) -c Lalign11.c
genalign11.o : genalign11.c $(HEADER)
$(CC) $(MYCFLAGS) -c genalign11.c
-genGalign11.o : genGalign11.c $(HEADER)
- $(CC) $(MYCFLAGS) -c genGalign11.c
-
suboptalign11.o : suboptalign11.c $(HEADER)
$(CC) $(MYCFLAGS) -c suboptalign11.c
splittbfast2.o : splittbfast2.c $(HEADER) $(FFTHEADER)
$(CC) $(MYCFLAGS) -c splittbfast2.c
+makedirectionlist.o : makedirectionlist.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c makedirectionlist.c
+
disttbfast_dummy.o : disttbfast_dummy.c $(HEADER) $(FFTHEADER)
$(CC) $(MYCFLAGS) -c disttbfast_dummy.c
tbfast.o : tbfast.c $(HEADER) $(FFTHEADER)
$(CC) $(MYCFLAGS) -c tbfast.c
+addsingle.o : addsingle.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c addsingle.c
+
tbfast2.o : tbfast2.c $(HEADER) $(FFTHEADER)
$(CC) $(MYCFLAGS) -c tbfast2.c
f2cl.o : f2cl.c $(HEADER)
$(CC) $(MYCFLAGS) -c f2cl.c
+setdirection.o : setdirection.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c setdirection.c
+
replaceu.o : replaceu.c $(HEADER)
$(CC) $(MYCFLAGS) -c replaceu.c
pairlocalalign.o : pairlocalalign.c $(HEADER) $(FFTHEADER)
$(CC) $(MYCFLAGS) -c pairlocalalign.c
+pairlocalalignmain.o : pairlocalalignmain.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c pairlocalalignmain.c
+
pairash.o : pairash.c $(HEADER) $(FFTHEADER)
$(CC) $(MYCFLAGS) -c pairash.c
-rnatest.o : rnatest.c $(HEADER) $(FFTHEADER)
- $(CC) $(MYCFLAGS) -c rnatest.c
-
multi2hat3s.o : multi2hat3s.c $(HEADER) $(FFTHEADER)
$(CC) $(MYCFLAGS) -c multi2hat3s.c
-pair2hat3s.o : pair2hat3s.c $(HEADER) $(FFTHEADER)
- $(CC) $(MYCFLAGS) -c pair2hat3s.c
-
-dndpre2.o : dndpre2.c $(HEADER)
- $(CC) $(MYCFLAGS) -c dndpre2.c
-
io.o : io.c $(HEADER) $(FFTHEADER)
$(CC) $(MYCFLAGS) -c io.c
$(CC) $(MYCFLAGS) -c score.c
clean :
- rm -f *.o *.a *.exe *~ $(PROGS) $(SCRIPTS)
+ rm -f *.o *.a *.exe *~ $(PERLPROGS) $(PROGS) $(SCRIPTS) $(SOS) $(DYLIBS) $(DLLS) *.gcda *.gcno
# rm -f ../binaries/* ../scripts/*
install : all
- mkdir -p $(LIBDIR)
- chmod 755 $(LIBDIR)
- mkdir -p $(BINDIR)
- chmod 755 $(BINDIR)
+ mkdir -p $(DESTDIR)$(LIBDIR)
+ chmod 755 $(DESTDIR)$(LIBDIR)
+ mkdir -p $(DESTDIR)$(BINDIR)
+ chmod 755 $(DESTDIR)$(BINDIR)
chmod 755 $(SCRIPTS)
- $(INSTALL) $(SCRIPTS) $(BINDIR)
- chmod 755 $(PROGS)
- $(INSTALL) $(PROGS) $(LIBDIR)
- $(INSTALL) -m 644 $(MANPAGES) $(LIBDIR)
+ $(INSTALL) $(SCRIPTS) $(DESTDIR)$(BINDIR)
+ chmod 755 $(PROGS) ||: # in MinGW, it's ok if this fails
+ $(INSTALL) -s $(PROGS) $(DESTDIR)$(LIBDIR)
+ $(INSTALL) $(PERLPROGS) $(DESTDIR)$(LIBDIR)
+ $(INSTALL) -m 644 $(MANPAGES) $(DESTDIR)$(LIBDIR)
- ( cd $(BINDIR); \
+ ( cd $(DESTDIR)$(BINDIR); \
rm -f linsi ginsi einsi fftns fftnsi nwns nwnsi xinsi qinsi; \
-rm -f mafft-linsi mafft-ginsi mafft-einsi mafft-fftns mafft-fftnsi mafft-nwns mafft-nwnsi mafft-xinsi mafft-qinsi; \
+rm -f mafft-linsi mafft-ginsi mafft-einsi mafft-fftns mafft-fftnsi mafft-nwns mafft-nwnsi mafft-xinsi mafft-qinsi mafft-randomcore.rb ; \
ln -s mafft linsi; ln -s mafft ginsi; ln -s mafft fftns; \
ln -s mafft fftnsi; ln -s mafft nwns; ln -s mafft nwnsi; \
ln -s mafft einsi; \
rm -f mafft-profile mafft-profile.exe; ln -s $(LIBDIR)/mafft-profile .; \
rm -f mafft-distance mafft-distance.exe; ln -s $(LIBDIR)/mafft-distance . )
- mkdir -p $(MANDIR)
- chmod 755 $(MANDIR)
- $(INSTALL) -m 644 $(MANPAGES) $(MANDIR)
+ mkdir -p $(DESTDIR)$(MANDIR)
+ chmod 755 $(DESTDIR)$(MANDIR)
+ $(INSTALL) -m 644 $(MANPAGES) $(DESTDIR)$(MANDIR)
# remove incorrectly installed manpages by previous versions
# rm -f /usr/local/man/man1/mafft.1 /usr/local/man/man1/mafft-homologs.1
--- /dev/null
+PREFIX = /usr/local
+LIBDIR = $(PREFIX)/libexec/mafft
+BINDIR = $(PREFIX)/bin
+MANDIR = $(PREFIX)/share/man/man1
+
+#MNO_CYGWIN = -mno-cygwin
+
+ENABLE_MULTITHREAD = -Denablemultithread
+# Comment out the above line if your compiler
+# does not support TLS (thread-local strage).
+
+CC = gcc
+#CFLAGS = -O3
+#CFLAGS = -O3 -fPIC
+# add -fPIC when building .so files
+
+#CC = icc
+#CFLAGS = -fast
+# if you have icc, use this.
+
+#CFLAGS = -O0 -fPIC -pedantic -Wall -std=c99 -g -pg -DMALLOC_CHECK_=3
+CFLAGS = -fPIC -O0 -fPIC -pedantic -Wall -std=c99 -g -DMALLOC_CHECK_=3 # for shark, valgrind
+
+
+MYCFLAGS = $(MNO_CYGWIN) $(ENABLE_MULTITHREAD) $(CFLAGS)
+
+ifdef ENABLE_MULTITHREAD
+LIBS = -lm -lpthread
+else
+LIBS = -lm
+endif
+
+INSTALL = install
+
+PROGS = dvtditr dndfast7 dndblast sextet5 mafft-distance pairlocalalign \
+ pair2hat3s multi2hat3s pairash addsingle \
+ splittbfast disttbfast tbfast mafft-profile f2cl mccaskillwrap contrafoldwrap countlen \
+ seq2regtable regtable2seq score getlag dndpre setcore replaceu restoreu setdirection makedirectionlist version
+SOS = libdisttbfast.so
+DLLS = libdisttbfast.dll
+DYLIBS = libdisttbfast.dylib
+
+PERLPROGS = mafftash_premafft.pl seekquencer_premafft.pl
+SCRIPTS = mafft mafft-homologs.rb
+OBJSETDIRECTION = mtxutl.o io.o setdirection.o defs.o mltaln9.o
+OBJREPLACEU = mtxutl.o io.o replaceu.o defs.o mltaln9.o
+OBJRESTOREU = mtxutl.o io.o restoreu.o defs.o mltaln9.o
+OBJREGTABLE2SEQ = mtxutl.o io.o regtable2seq.o defs.o mltaln9.o
+OBJSEQ2REGTABLE = mtxutl.o io.o seq2regtable.o defs.o
+OBJCOUNTLEN = mtxutl.o io.o countlen.o defs.o
+OBJF2CL = mtxutl.o io.o f2cl.o constants.o defs.o
+OBJMCCASKILLWRAP = mtxutl.o io.o mccaskillwrap.o constants.o defs.o mltaln9.o
+OBJCONTRAFOLDWRAP = mtxutl.o io.o contrafoldwrap.o constants.o defs.o mltaln9.o
+OBJMULTI2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \
+ multi2hat3s.o defs.o fft.o fftFunctions.o
+OBJPAIR2HAT3S = mtxutl.o io.o mltaln9.o tddis.o constants.o \
+ pair2hat3s.o defs.o fft.o fftFunctions.o
+OBJPAIRASH = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genalign11.o Lalign11.o SAalignmm.o \
+ pairash.o defs.o fft.o fftFunctions.o
+OBJPAIRLOCALALIGN = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o MSalignmm.o Galign11.o MSalign11.o suboptalign11.o genalign11.o Lalign11.o SAalignmm.o \
+ pairlocalalign.o defs.o fft.o fftFunctions.o
+OBJDUMMY = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
+ disttbfast_dummy.o dummy.o defs.o fft.o fftFunctions.o
+OBJSPLITFROMALN = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
+ Lalign11.o splitfromaln.o defs.o fft.o fftFunctions.o
+OBJSPLITTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
+ Lalign11.o splittbfast.o defs.o fft.o fftFunctions.o
+OBJSPLITTBFAST2 = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
+ Lalign11.o splittbfast2.o defs.o fft.o fftFunctions.o
+OBJSPLITTBFASTP = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
+ Lalign11.o defs.o fft.o fftFunctions.o
+OBJDISTTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
+ disttbfast.o defs.o fft.o fftFunctions.o addfunctions.o
+OBJMAKEDIRECTIONLIST = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
+ Lalign11.o makedirectionlist.o defs.o fft.o fftFunctions.o addfunctions.o
+OBJTBFAST = mtxutl.o io.o mltaln9.o tddis.o constants.o MSalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o \
+ tbfast.o defs.o fft.o fftFunctions.o addfunctions.o
+OBJADDSINGLE = mtxutl.o io.o mltaln9.o tddis.o constants.o MSalignmm.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o \
+ addsingle.o defs.o fft.o fftFunctions.o addfunctions.o
+OBJTBFAST2 = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o MSalignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o \
+ tbfast2.o defs.o fft.o fftFunctions.o
+OBJSETCORE = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
+ setcore.o defs.o fft.o fftFunctions.o
+OBJTDITR = mtxutl.o io.o mltaln9.o tddis.o constants.o nj.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o fftFunctions.o fft.o \
+ tditeration.o tditr.o defs.o SAalignmm.o treeOperation.o
+OBJDVTDITR = mtxutl.o io.o mltaln9.o tddis.o constants.o nj.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o MSalignmm.o fftFunctions.o fft.o \
+ tditeration.o dvtditr.o defs.o SAalignmm.o treeOperation.o addfunctions.o
+OBJGETLAG = mtxutl.o io.o mltaln9.o tddis.o constants.o partSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ Falign.o Falign_localhom.o Galign11.o SAalignmm.o MSalignmm.o \
+ getlag.o defs.o fft.o fftFunctions.o
+OBJGAPFILL = mtxutl.o io.o constants.o gapfill.o defs.o
+OBJDNDFAST5 = dndfast5.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o
+OBJDNDBLAST = dndblast.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o
+OBJDNDFAST7 = dndfast7.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o
+OBJDNDFAST6 = dndfast6.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o
+OBJDNDFAST4 = dndfast4.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o
+OBJDNDFAST6 = dndfast6.o io.o constants.o mtxutl.o mltaln9.o tddis.o defs.o
+OBJSEXTET5 = io.o constants.o mtxutl.o mltaln9.o tddis.o sextet5.o defs.o
+OBJDISTANCE = io.o constants.o mtxutl.o mltaln9.o tddis.o mafft-distance.o defs.o
+OBJTRIPLET6 = io.o constants.o mtxutl.o mltaln9.o tddis.o triplet6.o defs.o
+OBJTRIPLET5 = io.o constants.o mtxutl.o mltaln9.o tddis.o triplet5.o defs.o
+OBJOCTET4 = io.o constants.o mtxutl.o mltaln9.o tddis.o octet4.o defs.o
+OBJDNDPRE = dndpre.o io.o constants.o mtxutl.o mltaln9.o defs.o
+OBJGALN = io.o mtxutl.o mltaln9.o tddis.o constants.o partSalignmm.o MSalignmm.o Lalignmm.o rna.o Salignmm.o \
+ SAalignmm.o Galign11.o Falign.o Falign_localhom.o fftFunctions.o fft.o mafft-profile.o defs.o
+OBJSCORE = io.o mtxutl.o mltaln9.o score.o constants.o defs.o
+
+HEADER = mltaln.h mtxutl.h mafft.h
+FFTHEADER = fft.h
+
+MANPAGES = mafft.1 mafft-homologs.1
+
+
+
+
+all : $(PERLPROGS) $(PROGS) $(SCRIPTS)
+ cp $(SCRIPTS) ../scripts
+ chmod 755 ../scripts/*
+ cp $(PERLPROGS) $(PROGS) ../binaries
+ chmod 755 ../binaries/*
+ cp $(MANPAGES) ../binaries
+ @echo done.
+
+sos : $(SOS)
+dylibs : $(DYLIBS)
+dlls : $(DLLS)
+
+univscript: univscript.tmpl Makefile
+ sed "s:_PROGS:$(PROGS):" univscript.tmpl > univscript
+
+mafft: mafft.tmpl mltaln.h
+ sed "s:_LIBDIR:$(LIBDIR):" mafft.tmpl > mafft
+
+mafft-homologs.rb: mafft-homologs.tmpl
+# cp mafft-homologs.tmpl mafft-homologs.rb
+ sed "s:_BINDIR:$(BINDIR):" mafft-homologs.tmpl > mafft-homologs.rb
+
+mltaln.h : functions.h
+ touch mltaln.h
+
+version : version.c mltaln.h
+ $(CC) -o $@ version.c $(MYCFLAGS) $(LDFLAGS)
+
+tbfast : $(OBJTBFAST)
+ $(CC) -o $@ $(OBJTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+addsingle : $(OBJADDSINGLE)
+ $(CC) -o $@ $(OBJADDSINGLE) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+tbfast2 : $(OBJTBFAST2)
+ $(CC) -o $@ $(OBJTBFAST2) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+disttbfast : $(OBJDISTTBFAST)
+ $(CC) -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+libdisttbfast.so : $(OBJDISTTBFAST)
+ $(CC) -shared -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+libdisttbfast.dylib : $(OBJDISTTBFAST)
+ $(CC) -dynamiclib -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+libdisttbfast.dll : $(OBJDISTTBFAST)
+ $(CC) -shared -o $@ $(OBJDISTTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+makedirectionlist : $(OBJMAKEDIRECTIONLIST)
+ $(CC) -o $@ $(OBJMAKEDIRECTIONLIST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+splittbfast : $(OBJSPLITTBFAST)
+ $(CC) -o $@ $(OBJSPLITTBFAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+splitfromaln : $(OBJSPLITFROMALN)
+ $(CC) -o $@ $(OBJSPLITFROMALN) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+splittbfast2 : $(OBJSPLITTBFAST2)
+ $(CC) -o $@ $(OBJSPLITTBFAST2) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+dummy : $(OBJDUMMY)
+ $(CC) -o $@ $(OBJDUMMY) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+setcore : $(OBJSETCORE)
+ $(CC) -o $@ $(OBJSETCORE) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+countlen : $(OBJCOUNTLEN)
+ $(CC) -o $@ $(OBJCOUNTLEN) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+seq2regtable : $(OBJSEQ2REGTABLE)
+ $(CC) -o $@ $(OBJSEQ2REGTABLE) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+regtable2seq : $(OBJREGTABLE2SEQ)
+ $(CC) -o $@ $(OBJREGTABLE2SEQ) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+setdirection : $(OBJSETDIRECTION)
+ $(CC) -o $@ $(OBJSETDIRECTION) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+replaceu : $(OBJREPLACEU)
+ $(CC) -o $@ $(OBJREPLACEU) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+restoreu : $(OBJRESTOREU)
+ $(CC) -o $@ $(OBJRESTOREU) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+f2cl : $(OBJF2CL)
+ $(CC) -o $@ $(OBJF2CL) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+mccaskillwrap : $(OBJMCCASKILLWRAP)
+ $(CC) -o $@ $(OBJMCCASKILLWRAP) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+contrafoldwrap : $(OBJCONTRAFOLDWRAP)
+ $(CC) -o $@ $(OBJCONTRAFOLDWRAP) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+pairlocalalign : $(OBJPAIRLOCALALIGN)
+ $(CC) -o $@ $(OBJPAIRLOCALALIGN) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+pairash : $(OBJPAIRASH)
+ $(CC) -o $@ $(OBJPAIRASH) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+pair2hat3s : $(OBJPAIR2HAT3S)
+ $(CC) -o $@ $(OBJPAIR2HAT3S) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+multi2hat3s : $(OBJMULTI2HAT3S)
+ $(CC) -o $@ $(OBJMULTI2HAT3S) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+getlag : $(OBJGETLAG)
+ $(CC) -o $@ $(OBJGETLAG) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+tditr : $(OBJTDITR)
+ $(CC) -o $@ $(OBJTDITR) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+dvtditr : $(OBJDVTDITR)
+ $(CC) -o $@ $(OBJDVTDITR) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+mafft-profile : $(OBJGALN)
+ $(CC) -o $@ $(OBJGALN) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+gapfill : $(OBJGAPFILL)
+ $(CC) -o $@ $(OBJGAPFILL) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+dndfast4 : $(OBJDNDFAST4)
+ $(CC) -o $@ $(OBJDNDFAST4) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+dndfast5 : $(OBJDNDFAST5)
+ $(CC) -o $@ $(OBJDNDFAST5) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+dndfast6 : $(OBJDNDFAST6)
+ $(CC) -o $@ $(OBJDNDFAST6) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+dndfast7 : $(OBJDNDFAST7)
+ $(CC) -o $@ $(OBJDNDFAST7) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+dndblast : $(OBJDNDBLAST)
+ $(CC) -o $@ $(OBJDNDBLAST) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+dndfast3 : $(OBJDNDFAST3)
+ $(CC) -o $@ $(OBJDNDFAST3) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+triplet : $(OBJTRIPLET)
+ $(CC) -o $@ $(OBJTRIPLET) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+triplet3 : $(OBJTRIPLET3)
+ $(CC) -o $@ $(OBJTRIPLET3) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+sextet3 : $(OBJSEXTET3)
+ $(CC) -o $@ $(OBJSEXTET3) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+sextet4 : $(OBJSEXTET4)
+ $(CC) -o $@ $(OBJSEXTET4) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+sextet5 : $(OBJSEXTET5)
+ $(CC) -o $@ $(OBJSEXTET5) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+mafft-distance : $(OBJDISTANCE)
+ $(CC) -o $@ $(OBJDISTANCE) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+triplet5 : $(OBJTRIPLET5)
+ $(CC) -o $@ $(OBJTRIPLET5) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+triplet6 : $(OBJTRIPLET6)
+ $(CC) -o $@ $(OBJTRIPLET6) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+octet4 : $(OBJOCTET4)
+ $(CC) -o $@ $(OBJOCTET4) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+dndpre : $(OBJDNDPRE)
+ $(CC) -o $@ $(OBJDNDPRE) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+score : $(OBJSCORE)
+ $(CC) -o $@ $(OBJSCORE) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+genMtx : $(OBJGENMTX)
+ $(CC) -o $@ $(OBJGENMTX) $(MYCFLAGS) $(LDFLAGS) $(LIBS)
+
+mafftash_premafft.pl : mafftash_premafft.tmpl
+ cp mafftash_premafft.tmpl mafftash_premafft.pl
+
+seekquencer_premafft.pl : seekquencer_premafft.tmpl
+ cp seekquencer_premafft.tmpl seekquencer_premafft.pl
+
+gapfill.o : gapfill.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c gapfill.c
+
+mltaln9.o : mltaln9.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c mltaln9.c
+
+tddis.o : tddis.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c tddis.c
+
+constants.o : constants.c miyata.h miyata5.h blosum.c DNA.h JTT.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c constants.c
+
+defs.o : defs.c
+ $(CC) $(MYCFLAGS) -c defs.c
+
+#A+++alignmm.o : SA+++alignmm.c $(HEADER)
+# $(CC) $(MYCFLAGS) -c SA+++alignmm.c -o A+++alignmm.o
+
+Salignmm.o : Salignmm.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c Salignmm.c
+
+MSalignmm.o : MSalignmm.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c MSalignmm.c
+
+partSalignmm.o : partSalignmm.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c partSalignmm.c
+
+Lalign11.o : Lalign11.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c Lalign11.c
+
+genalign11.o : genalign11.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c genalign11.c
+
+suboptalign11.o : suboptalign11.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c suboptalign11.c
+
+Galign11.o : Galign11.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c Galign11.c
+
+MSalign11.o : MSalign11.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c MSalign11.c
+
+SAalignmm.o : SAalignmm.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c SAalignmm.c -o SAalignmm.o
+
+Lalignmm.o : Lalignmm.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c Lalignmm.c
+
+rna.o : rna.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c rna.c
+
+disttbfast.o : disttbfast.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c disttbfast.c
+
+splitfromaln.o : splitfromaln.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c splitfromaln.c
+
+splittbfast.o : splittbfast.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c splittbfast.c
+
+splittbfast2.o : splittbfast2.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c splittbfast2.c
+
+makedirectionlist.o : makedirectionlist.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c makedirectionlist.c
+
+disttbfast_dummy.o : disttbfast_dummy.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c disttbfast_dummy.c
+
+dummy.o : dummy.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c dummy.c
+
+tbfast.o : tbfast.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c tbfast.c
+
+addsingle.o : addsingle.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c addsingle.c
+
+tbfast2.o : tbfast2.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c tbfast2.c
+
+setcore.o : setcore.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c setcore.c
+
+getlag.o : getlag.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c getlag.c
+
+tditr.o : tditr.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c tditr.c
+
+dvtditr.o : dvtditr.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c dvtditr.c
+
+tditeration.o : tditeration.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c tditeration.c
+
+mafft-profile.o : mafft-profile.c $(HEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c mafft-profile.c
+
+dndfast4.o : dndfast4.c $(HEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c dndfast4.c
+
+dndfast5.o : dndfast5.c $(HEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c dndfast5.c
+
+dndfast6.o : dndfast6.c $(HEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c dndfast6.c
+
+dndfast7.o : dndfast7.c $(HEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c dndfast7.c
+
+dndblast.o : dndblast.c $(HEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c dndblast.c
+
+dndfast3.o : dndfast3.c $(HEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c dndfast3.c
+
+dndpre.o : dndpre.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c dndpre.c
+
+countlen.o : countlen.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c countlen.c
+
+seq2regtable.o : seq2regtable.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c seq2regtable.c
+
+regtable2seq.o : regtable2seq.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c regtable2seq.c
+
+f2cl.o : f2cl.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c f2cl.c
+
+setdirection.o : setdirection.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c setdirection.c
+
+replaceu.o : replaceu.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c replaceu.c
+
+restoreu.o : restoreu.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c restoreu.c
+
+mccaskillwrap.o : mccaskillwrap.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c mccaskillwrap.c
+
+contrafoldwrap.o : contrafoldwrap.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c contrafoldwrap.c
+
+pairlocalalign.o : pairlocalalign.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c pairlocalalign.c
+
+pairash.o : pairash.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c pairash.c
+
+multi2hat3s.o : multi2hat3s.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c multi2hat3s.c
+
+pair2hat3s.o : pair2hat3s.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c pair2hat3s.c
+
+io.o : io.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c io.c
+
+nj.o : nj.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c nj.c
+
+treeOperation.o : treeOperation.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c treeOperation.c
+
+sextet5.o : sextet5.c $(HEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c sextet5.c
+
+mafft-distance.o : mafft-distance.c $(HEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c mafft-distance.c
+
+triplet5.o : triplet5.c $(HEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c triplet5.c
+
+triplet6.o : triplet6.c $(HEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c triplet6.c
+
+fft.o : fft.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c fft.c
+
+fftFunctions.o : fftFunctions.c $(HEADER) $(FFTHEADER)
+ $(CC) $(MYCFLAGS) -c fftFunctions.c
+
+Falign.o : Falign.c $(HEADER) $(FFTHEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c Falign.c
+
+Falign_localhom.o : Falign_localhom.c $(HEADER) $(FFTHEADER) $(MTXHEADER)
+ $(CC) $(MYCFLAGS) -c Falign_localhom.c
+
+mtxutl.o : mtxutl.c
+ $(CC) $(MYCFLAGS) -c mtxutl.c
+
+addfunctions.o : addfunctions.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c addfunctions.c
+
+score.o : score.c $(HEADER)
+ $(CC) $(MYCFLAGS) -c score.c
+
+clean :
+ rm -f *.o *.a *.exe *~ $(PERLPROGS) $(PROGS) $(SCRIPTS) $(SOS) $(DYLIBS) $(DLLS)
+# rm -f ../binaries/* ../scripts/*
+
+install : all
+ mkdir -p $(LIBDIR)
+ chmod 755 $(LIBDIR)
+ mkdir -p $(BINDIR)
+ chmod 755 $(BINDIR)
+ chmod 755 $(SCRIPTS)
+ $(INSTALL) $(SCRIPTS) $(BINDIR)
+ chmod 755 $(PROGS)
+ $(INSTALL) -s $(PROGS) $(LIBDIR)
+ $(INSTALL) $(PERLPROGS) $(LIBDIR)
+ $(INSTALL) -m 644 $(MANPAGES) $(LIBDIR)
+
+ ( cd $(BINDIR); \
+rm -f linsi ginsi einsi fftns fftnsi nwns nwnsi xinsi qinsi; \
+rm -f mafft-linsi mafft-ginsi mafft-einsi mafft-fftns mafft-fftnsi mafft-nwns mafft-nwnsi mafft-xinsi mafft-qinsi; \
+ln -s mafft linsi; ln -s mafft ginsi; ln -s mafft fftns; \
+ln -s mafft fftnsi; ln -s mafft nwns; ln -s mafft nwnsi; \
+ln -s mafft einsi; \
+ln -s mafft mafft-linsi; ln -s mafft mafft-ginsi; ln -s mafft mafft-fftns; \
+ln -s mafft mafft-fftnsi; ln -s mafft mafft-nwns; ln -s mafft mafft-nwnsi; \
+ln -s mafft mafft-einsi; ln -s mafft mafft-xinsi; ln -s mafft mafft-qinsi;\
+rm -f mafft-profile mafft-profile.exe; ln -s $(LIBDIR)/mafft-profile .; \
+rm -f mafft-distance mafft-distance.exe; ln -s $(LIBDIR)/mafft-distance . )
+
+ mkdir -p $(MANDIR)
+ chmod 755 $(MANDIR)
+ $(INSTALL) -m 644 $(MANPAGES) $(MANDIR)
+# remove incorrectly installed manpages by previous versions
+# rm -f /usr/local/man/man1/mafft.1 /usr/local/man/man1/mafft-homologs.1
#define DEBUG 0
-static void match_calc( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )
+static void match_calc( double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
{
int j, k, l;
- float scarr[26];
- float **cpmxpd = floatwork;
+// double scarr[26];
+ double **cpmxpd = doublework;
int **cpmxpdn = intwork;
int count = 0;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
if( initialize )
{
for( j=0; j<lgth2; j++ )
{
count = 0;
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
if( cpmx2[l][j] )
{
}
}
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
scarr[l] = 0.0;
- for( k=0; k<26; k++ )
+ for( k=0; k<nalphabets; k++ )
scarr[l] += n_dis[k][l] * cpmx1[k][i1];
}
for( j=0; j<lgth2; j++ )
for( k=0; cpmxpdn[k][j] > -1; k++ )
match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j];
}
+ free( scarr );
}
-static float Atracking( float *lasthorizontalw, float *lastverticalw,
+static double Atracking( double *lasthorizontalw, double *lastverticalw,
char **seq1, char **seq2,
char **mseq1, char **mseq2,
- float **cpmx1, float **cpmx2,
+ double **cpmx1, double **cpmx2,
int **ijp, int icyc, int jcyc )
{
int i, j, k, l, iin, jin, ifi, jfi, lgth1, lgth2;
// char gap[] = "-";
char *gap;
- float wm;
+ double wm;
gap = newgapstr;
lgth1 = strlen( seq1[0] );
lgth2 = strlen( seq2[0] );
}
-float Aalign( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen )
+double Aalign( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
register int i, j;
int lasti; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
int lgth1, lgth2;
int resultlen;
- float wm = 0.0; /* int ?????? */
- float g;
- float x;
- static TLS float mi, *m;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double x;
+ static TLS double mi, *m;
static TLS int **ijp;
static TLS int mpi, *mp;
- static TLS float *currentw;
- static TLS float *previousw;
- static TLS float *match;
- static TLS float *initverticalw; /* kufuu sureba iranai */
- static TLS float *lastverticalw; /* kufuu sureba iranai */
+ static TLS double *currentw;
+ static TLS double *previousw;
+ static TLS double *match;
+ static TLS double *initverticalw; /* kufuu sureba iranai */
+ static TLS double *lastverticalw; /* kufuu sureba iranai */
static TLS char **mseq1;
static TLS char **mseq2;
static TLS char **mseq;
- static TLS float **cpmx1;
- static TLS float **cpmx2;
+ static TLS double **cpmx1;
+ static TLS double **cpmx2;
static TLS int **intwork;
- static TLS float **floatwork;
+ static TLS double **doublework;
static TLS int orlgth1 = 0, orlgth2 = 0;
#if DEBUG
FreeFloatMtx( cpmx1 );
FreeFloatMtx( cpmx2 );
- FreeFloatMtx( floatwork );
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
}
mseq = AllocateCharMtx( njob, ll1+ll2 );
- cpmx1 = AllocateFloatMtx( 26, ll1+2 );
- cpmx2 = AllocateFloatMtx( 26, ll2+2 );
+ cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 );
+ cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 );
- floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 );
- intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 );
+ doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
+ intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
fprintf( stderr, "succeeded\n" );
cpmx_calc( seq1, cpmx1, eff1, strlen( seq1[0] ), icyc );
cpmx_calc( seq2, cpmx2, eff2, strlen( seq2[0] ), jcyc );
- match_calc( initverticalw, cpmx2, cpmx1, 0, lgth1, floatwork, intwork, 1 );
- match_calc( currentw, cpmx1, cpmx2, 0, lgth2, floatwork, intwork, 1 );
+ match_calc( initverticalw, cpmx2, cpmx1, 0, lgth1, doublework, intwork, 1 );
+ match_calc( currentw, cpmx1, cpmx2, 0, lgth2, doublework, intwork, 1 );
if( outgap == 1 )
{
for( i=1; i<lasti; i++ )
{
- floatncpy( previousw, currentw, lgth2+1 );
+ doublencpy( previousw, currentw, lgth2+1 );
previousw[0] = initverticalw[i-1];
- match_calc( currentw, cpmx1, cpmx2, i, lgth2, floatwork, intwork, 0 );
+ match_calc( currentw, cpmx1, cpmx2, i, lgth2, doublework, intwork, 0 );
currentw[0] = initverticalw[i];
mi = previousw[0] + penalty * 0.5; mpi = 0;
#define OUTGAP0TRY 0
#define DEBUG 0
#define XXXXXXX 0
-#define USE_PENALTY_EX 0
+#define USE_PENALTY_EX 1
#define FASTMATCHCALC 1
+#define SLOW 0
-static TLS float **impmtx = NULL;
+static TLS double **impmtx = NULL;
static TLS int impalloclen = 0;
-float imp_match_out_sc( int i1, int j1 )
+double imp_match_out_sc( int i1, int j1 )
{
// fprintf( stderr, "imp+match = %f\n", impmtx[i1][j1] * fastathreshold );
// fprintf( stderr, "val = %f\n", impmtx[i1][j1] );
return( impmtx[i1][j1] );
}
-static void imp_match_out_vead_gapmap( float *imp, int i1, int lgth2, int *gapmap2 )
+
+#if 0
+static void imp_match_out_vead_gapmap( double *imp, int i1, int lgth2, int *gapmap2 )
{
#if FASTMATCHCALC
- float *pt = impmtx[i1];
+ double *pt = impmtx[i1];
int *gapmappt = gapmap2;
while( lgth2-- )
*imp++ += pt[*gapmappt++];
#else
int j;
- float *pt = impmtx[i1];
+ double *pt = impmtx[i1];
for( j=0; j<lgth2; j++ )
*imp++ += pt[gapmap2[j]];
#endif
}
-static void imp_match_out_vead_tate_gapmap( float *imp, int j1, int lgth1, int *gapmap1 )
+static void imp_match_out_vead_tate_gapmap( double *imp, int j1, int lgth1, int *gapmap1 )
{
#if FASTMATCHCALC
int *gapmappt = gapmap1;
*imp++ += impmtx[gapmap1[i]][j1];
#endif
}
+#endif
-static void imp_match_out_vead( float *imp, int i1, int lgth2 )
+static void imp_match_out_vead( double *imp, int i1, int lgth2 )
{
#if FASTMATCHCALC
- float *pt = impmtx[i1];
+ double *pt = impmtx[i1];
while( lgth2-- )
*imp++ += *pt++;
#else
int j;
- float *pt = impmtx[i1];
+ double *pt = impmtx[i1];
for( j=0; j<lgth2; j++ )
*imp++ += pt[j];
#endif
}
-static void imp_match_out_vead_tate( float *imp, int j1, int lgth1 )
+static void imp_match_out_vead_tate( double *imp, int j1, int lgth1 )
{
int i;
for( i=0; i<lgth1; i++ )
foldrna( nseq1, nseq2, seq1, seq2, eff1, eff2, grouprna1, grouprna2, impmtx, gapmap1, gapmap2, pair );
}
-
-void imp_match_init_strict( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, int forscore )
+void imp_match_init_strict( double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, char *swaplist, int forscore, int *orinum1, int *orinum2 )
{
- int i, j, k1, k2, tmpint, start1, start2, end1, end2;
- float effij;
- float effij_kozo;
- double effijx;
- char *pt, *pt1, *pt2;
- static TLS char *nocount1 = NULL;
- static TLS char *nocount2 = NULL;
- LocalHom *tmpptr;
+// int i, j, k1, k2, tmpint, start1, start2, end1, end2;
+// double effij;
+// double effij_kozo;
+// double effijx;
+// char *pt, *pt1, *pt2;
+// static TLS char *nocount1 = NULL;
+// static TLS char *nocount2 = NULL;
+// LocalHom *tmpptr;
if( seq1 == NULL )
{
if( impmtx ) FreeFloatMtx( impmtx );
impmtx = NULL;
- if( nocount1 ) free( nocount1 );
- nocount1 = NULL;
- if( nocount2 ) free( nocount2 );
- nocount2 = NULL;
+// if( nocount1 ) free( nocount1 );
+// nocount1 = NULL;
+// if( nocount2 ) free( nocount2 );
+// nocount2 = NULL;
return;
}
if( impalloclen < lgth1 + 2 || impalloclen < lgth2 + 2 )
{
if( impmtx ) FreeFloatMtx( impmtx );
- if( nocount1 ) free( nocount1 );
- if( nocount2 ) free( nocount2 );
+// if( nocount1 ) free( nocount1 );
+// if( nocount2 ) free( nocount2 );
impalloclen = MAX( lgth1, lgth2 ) + 2;
impmtx = AllocateFloatMtx( impalloclen, impalloclen );
- nocount1 = AllocateCharVec( impalloclen );
- nocount2 = AllocateCharVec( impalloclen );
- }
-
- for( i=0; i<lgth1; i++ )
- {
- for( j=0; j<clus1; j++ )
- if( seq1[j][i] == '-' ) break;
- if( j != clus1 ) nocount1[i] = 1;
- else nocount1[i] = 0;
- }
- for( i=0; i<lgth2; i++ )
- {
- for( j=0; j<clus2; j++ )
- if( seq2[j][i] == '-' ) break;
- if( j != clus2 ) nocount2[i] = 1;
- else nocount2[i] = 0;
+// nocount1 = AllocateCharVec( impalloclen );
+// nocount2 = AllocateCharVec( impalloclen );
}
-#if 0
-fprintf( stderr, "nocount2 =\n" );
-for( i = 0; i<impalloclen; i++ )
-{
- fprintf( stderr, "nocount2[%d] = %d (%c)\n", i, nocount2[i], seq2[0][i] );
+ fillimp( impmtx, imp, clus1, clus2, lgth1, lgth2, seq1, seq2, eff1, eff2, eff1_kozo, eff2_kozo, localhom, swaplist, forscore, orinum1, orinum2 );
}
-#endif
-
-#if 0
- fprintf( stderr, "eff1 in _init_strict = \n" );
- for( i=0; i<clus1; i++ )
- fprintf( stderr, "eff1[] = %f\n", eff1[i] );
- for( i=0; i<clus2; i++ )
- fprintf( stderr, "eff2[] = %f\n", eff2[i] );
-#endif
-
- for( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )
- impmtx[i][j] = 0.0;
- effijx = fastathreshold;
- for( i=0; i<clus1; i++ )
+static void match_calc_del( int **which, double ***matrices, double *match, int n1, char **seq1, double *eff1, int n2, char **seq2, double *eff2, int i1, int lgth2, int mid, int nmask, int *mask1, int *mask2 )
+{
+// osoi!
+ int i, j, k, m;
+ int c1, c2;
+// fprintf( stderr, "\nmatch_calc_dynamicmtx... %d", i1 );
+// fprintf( stderr, "\nseq1[0]=%s\n", seq1[0] );
+// fprintf( stderr, "\nseq2[0]=%s\n", seq2[0] );
+// for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+// {
+// if( flip ) reporterr( "in match_calc_slow, which[%d][%d] = %d\n", j, i, which[j][i] );
+// else reporterr( "in match_calc_slow, which[%d][%d] = %d\n", i, j, which[i][j] );
+// }
+ for( k=0; k<lgth2; k++ )
{
- for( j=0; j<clus2; j++ )
- {
- effij = (float)( eff1[i] * eff2[j] * effijx );
- effij_kozo = (float)( eff1_kozo[i] * eff2_kozo[j] * effijx );
- tmpptr = localhom[i][j];
- while( tmpptr )
- {
-// fprintf( stderr, "start1 = %d\n", tmpptr->start1 );
-// fprintf( stderr, "end1 = %d\n", tmpptr->end1 );
-// fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] );
-// fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] );
- pt = seq1[i];
- tmpint = -1;
- while( *pt != 0 )
- {
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == tmpptr->start1 ) break;
- }
- start1 = pt - seq1[i] - 1;
-
- if( tmpptr->start1 == tmpptr->end1 ) end1 = start1;
- else
- {
-#if MACHIGAI
- while( *pt != 0 )
- {
-// fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] );
- if( tmpint == tmpptr->end1 ) break;
- if( *pt++ != '-' ) tmpint++;
- }
- end1 = pt - seq1[i] - 0;
-#else
- while( *pt != 0 )
- {
-// fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] );
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == tmpptr->end1 ) break;
- }
- end1 = pt - seq1[i] - 1;
-#endif
- }
-
- pt = seq2[j];
- tmpint = -1;
- while( *pt != 0 )
- {
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == tmpptr->start2 ) break;
- }
- start2 = pt - seq2[j] - 1;
- if( tmpptr->start2 == tmpptr->end2 ) end2 = start2;
- else
- {
-#if MACHIGAI
- while( *pt != 0 )
- {
- if( tmpint == tmpptr->end2 ) break;
- if( *pt++ != '-' ) tmpint++;
- }
- end2 = pt - seq2[j] - 0;
-#else
- while( *pt != 0 )
- {
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == tmpptr->end2 ) break;
- }
- end2 = pt - seq2[j] - 1;
-#endif
- }
-// fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] );
-// fprintf( stderr, "step 0\n" );
- if( end1 - start1 != end2 - start2 )
- {
-// fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
- }
-
-#if 1
- k1 = start1; k2 = start2;
- pt1 = seq1[i] + k1;
- pt2 = seq2[j] + k2;
- while( *pt1 && *pt2 )
- {
- if( *pt1 != '-' && *pt2 != '-' )
- {
-// ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£
-// impmtx[k1][k2] += tmpptr->wimportance * fastathreshold;
-// impmtx[k1][k2] += tmpptr->importance * effij;
-// impmtx[k1][k2] += tmpptr->fimportance * effij;
- if( tmpptr->korh == 'k' )
- impmtx[k1][k2] += tmpptr->fimportance * effij_kozo;
- else
- impmtx[k1][k2] += tmpptr->fimportance * effij;
-
-// fprintf( stderr, "#### impmtx[k1][k2] = %f, tmpptr->fimportance=%f, effij=%f\n", impmtx[k1][k2], tmpptr->fimportance, effij );
-// fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
-// fprintf( stderr, "%d (%c) - %d (%c) - %f\n", k1, *pt1, k2, *pt2, tmpptr->fimportance * effij );
- k1++; k2++;
- pt1++; pt2++;
- }
- else if( *pt1 != '-' && *pt2 == '-' )
- {
-// fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
- k2++; pt2++;
- }
- else if( *pt1 == '-' && *pt2 != '-' )
- {
-// fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
- k1++; pt1++;
- }
- else if( *pt1 == '-' && *pt2 == '-' )
- {
-// fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
- k1++; pt1++;
- k2++; pt2++;
- }
- if( k1 > end1 || k2 > end2 ) break;
- }
-#else
- while( k1 <= end1 && k2 <= end2 )
- {
- fprintf( stderr, "k1,k2=%d,%d - ", k1, k2 );
- if( !nocount1[k1] && !nocount2[k2] )
- {
- impmtx[k1][k2] += tmpptr->wimportance * eff1[i] * eff2[j] * fastathreshold;
- fprintf( stderr, "marked\n" );
- }
- else
- fprintf( stderr, "no count\n" );
- k1++; k2++;
- }
-#endif
- tmpptr = tmpptr->next;
- }
- }
- }
-
-#if 0
- if( clus1 == 1 && clus2 == 1 )
- {
- fprintf( stderr, "writing impmtx\n" );
- fprintf( stderr, "\n" );
- fprintf( stderr, "seq1[0] = %s\n", seq1[0] );
- fprintf( stderr, "seq2[0] = %s\n", seq2[0] );
- fprintf( stderr, "impmtx = \n" );
- for( k2=0; k2<lgth2; k2++ )
- fprintf( stderr, "%6.3f ", (double)k2 );
- fprintf( stderr, "\n" );
- for( k1=0; k1<lgth1; k1++ )
+ for( m=0; m<nmask; m++ )
{
- fprintf( stderr, "%d ", k1 );
- for( k2=0; k2<30; k2++ )
- fprintf( stderr, "%2.1f ", impmtx[k1][k2] );
- fprintf( stderr, "\n" );
+ i = mask1[m];
+ j = mask2[m];
+// reporterr( "Deleting %d-%d (c=%d)\n", i, j, mid );
+// if( k==0 ) fprintf( stderr, "pairoffset[%d][%d] = %f\n", i, j, po );
+ c1 = amino_n[(unsigned int)seq1[i][i1]];
+ c2 = amino_n[(unsigned int)seq2[j][k]];
+// reporterr( "k=%d, c1=%d, c2=%d, seq1[i][i1]=%c, seq2[%d][%d]=%c\n", k, c1, c2, seq1[i][i1], j, k, seq2[j][k] );
+ if( seq1[i][i1] == '-' || seq2[j][k] == '-' ) continue;
+ if( c1 < 0 || c2 < 0 ) continue;
+// fprintf( stderr, "c1=%d, c2=%d\n", c1, c2 );
+// fprintf( stderr, "match[k] = %f -> ", match[k], mid );
+ match[k] -= matrices[mid][c1][c2] * eff1[i] * eff2[j];
+// fprintf( stderr, "match[k] = %f (mid=%d)\n", match[k], mid );
}
-// exit( 1 );
}
-#endif
+// fprintf( stderr, "done\n" );
+ return;
}
-#if 0
-void imp_match_init( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom )
-{
- int dif, i, j, k1, k2, tmpint, start1, start2, end1, end2;
- static TLS int impalloclen = 0;
- char *pt;
- int allgap;
- static TLS char *nocount1 = NULL;
- static TLS char *nocount2 = NULL;
-
- if( impalloclen < lgth1 + 2 || impalloclen < lgth2 + 2 )
- {
- if( impmtx ) FreeFloatMtx( impmtx );
- if( nocount1 ) free( nocount1 );
- if( nocount2 ) free( nocount2 );
- impalloclen = MAX( lgth1, lgth2 ) + 2;
- impmtx = AllocateFloatMtx( impalloclen, impalloclen );
- nocount1 = AllocateCharVec( impalloclen );
- nocount2 = AllocateCharVec( impalloclen );
- }
- for( i=0; i<lgth1; i++ )
- {
- for( j=0; j<clus1; j++ )
- if( seq1[j][i] == '-' ) break;
- if( j != clus1 ) nocount1[i] = 1;
- else nocount1[i] = 0;
- }
- for( i=0; i<lgth2; i++ )
+#if SLOW
+static void match_calc_slow( int **which, double ***matrices, double *match, int n1, char **seq1, double *eff1, int n2, char **seq2, double *eff2, int i1, int lgth2, double **doublework, int **intwork, int initialize, int flip )
+{
+// osoi!
+ int i, j, k;
+ int c1, c2;
+ int mid;
+// fprintf( stderr, "\nmatch_calc_dynamicmtx... %d", i1 );
+// fprintf( stderr, "\nseq1[0]=%s\n", seq1[0] );
+// fprintf( stderr, "\nseq2[0]=%s\n", seq2[0] );
+// for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+// {
+// if( flip ) reporterr( "in match_calc_slow, which[%d][%d] = %d\n", j, i, which[j][i] );
+// else reporterr( "in match_calc_slow, which[%d][%d] = %d\n", i, j, which[i][j] );
+// }
+ for( k=0; k<lgth2; k++ )
{
- for( j=0; j<clus2; j++ )
- if( seq2[j][i] == '-' ) break;
- if( j != clus2 ) nocount2[i] = 1;
- else nocount2[i] = 0;
+ match[k] = 0.0;
+ for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+ {
+ if( flip ) mid = which[j][i];
+ else mid = which[i][j];
+// if( k==0 ) fprintf( stderr, "pairoffset[%d][%d] = %f\n", i, j, po );
+ c1 = amino_n[(unsigned int)seq1[i][i1]];
+ c2 = amino_n[(unsigned int)seq2[j][k]];
+ if( seq1[i][i1] == '-' || seq2[j][k] == '-' ) continue;
+ if( c1 < 0 || c2 < 0 ) continue;
+// fprintf( stderr, "c1=%d, c2=%d\n", c1, c2 );
+ if( flip )
+ match[k] += matrices[mid][c1][c2] * eff1[i] * eff2[j];
+ else
+ match[k] += matrices[mid][c1][c2] * eff1[i] * eff2[j];
+// fprintf( stderr, "match[k] = %f (which=%d)\n", match[k], mid );
+ }
}
+// fprintf( stderr, "done\n" );
+ return;
+}
+#endif
-#if 0
-fprintf( stderr, "nocount2 =\n" );
-for( i = 0; i<impalloclen; i++ )
+static void fillzero( double *s, int l )
{
- fprintf( stderr, "nocount2[%d] = %d (%c)\n", i, nocount2[i], seq2[0][i] );
+ while( l-- ) *s++ = 0.0;
}
-#endif
- for( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )
- impmtx[i][j] = 0;
- for( i=0; i<clus1; i++ )
+static void match_calc_add( double **scoreingmtx, double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
+{
+#if FASTMATCHCALC
+// fprintf( stderr, "\nmatch_calc... %d", i1 );
+ int j, l;
+// double scarr[26];
+ double **cpmxpd = doublework;
+ int **cpmxpdn = intwork;
+ double *matchpt, *cpmxpdpt, **cpmxpdptpt;
+ int *cpmxpdnpt, **cpmxpdnptpt;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
+ if( initialize )
{
- fprintf( stderr, "i = %d, seq1 = %s\n", i, seq1[i] );
- for( j=0; j<clus2; j++ )
+ int count = 0;
+ for( j=0; j<lgth2; j++ )
{
- fprintf( stderr, "start1 = %d\n", localhom[i][j]->start1 );
- fprintf( stderr, "end1 = %d\n", localhom[i][j]->end1 );
- fprintf( stderr, "j = %d, seq2 = %s\n", j, seq2[j] );
- pt = seq1[i];
- tmpint = -1;
- while( *pt != 0 )
- {
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == localhom[i][j]->start1 ) break;
- }
- start1 = pt - seq1[i] - 1;
-
- while( *pt != 0 )
- {
-// fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, localhom[i][j].end1, pt-seq1[i] );
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == localhom[i][j]->end1 ) break;
- }
- end1 = pt - seq1[i] - 1;
-
- pt = seq2[j];
- tmpint = -1;
- while( *pt != 0 )
- {
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == localhom[i][j]->start2 ) break;
- }
- start2 = pt - seq2[j] - 1;
- while( *pt != 0 )
- {
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == localhom[i][j]->end2 ) break;
- }
- end2 = pt - seq2[j] - 1;
-// fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
- k1 = start1;
- k2 = start2;
- fprintf( stderr, "step 0\n" );
- while( k1 <= end1 && k2 <= end2 )
- {
-#if 0
- if( !nocount1[k1] && !nocount2[k2] )
- impmtx[k1][k2] += localhom[i][j].wimportance * eff1[i] * eff2[j];
- k1++; k2++;
-#else
- if( !nocount1[k1] && !nocount2[k2] )
- impmtx[k1][k2] += localhom[i][j]->wimportance * eff1[i] * eff2[j];
- k1++; k2++;
-#endif
- }
-
- dif = ( end1 - start1 ) - ( end2 - start2 );
- fprintf( stderr, "dif = %d\n", dif );
- if( dif > 0 )
+ count = 0;
+ for( l=0; l<nalphabets; l++ )
{
- do
+ if( cpmx2[l][j] )
{
- fprintf( stderr, "dif = %d\n", dif );
- k1 = start1;
- k2 = start2 - dif;
- while( k1 <= end1 && k2 <= end2 )
- {
- if( 0 <= k2 && start2 <= k2 && !nocount1[k1] && !nocount2[k2] )
- impmtx[k1][k2] = localhom[i][j]->wimportance * eff1[i] * eff2[j];
- k1++; k2++;
- }
+ cpmxpd[j][count] = cpmx2[l][j];
+ cpmxpdn[j][count] = l;
+ count++;
}
- while( dif-- );
}
- else
+ cpmxpdn[j][count] = -1;
+ }
+ }
+
+ {
+ for( l=0; l<nalphabets; l++ )
+ {
+ scarr[l] = 0.0;
+ for( j=0; j<nalphabets; j++ )
+// scarr[l] += n_dis[j][l] * cpmx1[j][i1];
+// scarr[l] += n_dis_consweight_multi[j][l] * cpmx1[j][i1];
+ scarr[l] += scoreingmtx[j][l] * cpmx1[j][i1];
+ }
+ matchpt = match;
+ cpmxpdnptpt = cpmxpdn;
+ cpmxpdptpt = cpmxpd;
+ while( lgth2-- )
+ {
+// *matchpt = 0.0;
+ cpmxpdnpt = *cpmxpdnptpt++;
+ cpmxpdpt = *cpmxpdptpt++;
+ while( *cpmxpdnpt>-1 )
+ *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++;
+ matchpt++;
+ }
+ }
+ free( scarr );
+// fprintf( stderr, "done\n" );
+#else
+ int j, k, l;
+// double scarr[26];
+ double **cpmxpd = doublework;
+ int **cpmxpdn = intwork;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
+// simple
+ if( initialize )
+ {
+ int count = 0;
+ for( j=0; j<lgth2; j++ )
+ {
+ count = 0;
+ for( l=0; l<nalphabets; l++ )
{
- do
+ if( cpmx2[l][j] )
{
- k1 = start1 + dif;
- k2 = start2;
- while( k1 <= end1 )
- {
- if( k1 >= 0 && k1 >= start1 && !nocount1[k1] && !nocount2[k2] )
- impmtx[k1][k2] = localhom[i][j]->wimportance * eff1[i] * eff2[j];
- k1++; k2++;
- }
+ cpmxpd[count][j] = cpmx2[l][j];
+ cpmxpdn[count][j] = l;
+ count++;
}
- while( dif++ );
}
+ cpmxpdn[count][j] = -1;
}
}
-#if 0
- fprintf( stderr, "impmtx = \n" );
- for( k2=0; k2<lgth2; k2++ )
- fprintf( stderr, "%6.3f ", (double)k2 );
- fprintf( stderr, "\n" );
- for( k1=0; k1<lgth1; k1++ )
+ for( l=0; l<nalphabets; l++ )
{
- fprintf( stderr, "%d", k1 );
- for( k2=0; k2<lgth2; k2++ )
- fprintf( stderr, "%6.3f ", impmtx[k1][k2] );
- fprintf( stderr, "\n" );
+ scarr[l] = 0.0;
+ for( k=0; k<nalphabets; k++ )
+// scarr[l] += n_dis[k][l] * cpmx1[k][i1];
+// scarr[l] += n_dis_consweight_multi[k][l] * cpmx1[k][i1];
+ scarr[l] += scoreingmtx[k][l] * cpmx1[k][i1];
}
+ for( j=0; j<lgth2; j++ )
+ {
+ match[j] = 0.0;
+ for( k=0; cpmxpdn[k][j]>-1; k++ )
+ match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j];
+ }
+ free( scarr );
#endif
}
-#endif
-
-static void match_calc( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )
+static void match_calc( double **n_dynamicmtx, double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
{
#if FASTMATCHCALC
+// fprintf( stderr, "\nmatch_calc... %d", i1 );
int j, l;
- float scarr[26];
- float **cpmxpd = floatwork;
+// double scarr[26];
+ double **cpmxpd = doublework;
int **cpmxpdn = intwork;
- float *matchpt, *cpmxpdpt, **cpmxpdptpt;
+ double *matchpt, *cpmxpdpt, **cpmxpdptpt;
int *cpmxpdnpt, **cpmxpdnptpt;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
if( initialize )
{
int count = 0;
for( j=0; j<lgth2; j++ )
{
count = 0;
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
if( cpmx2[l][j] )
{
}
{
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
scarr[l] = 0.0;
- for( j=0; j<26; j++ )
+ for( j=0; j<nalphabets; j++ )
// scarr[l] += n_dis[j][l] * cpmx1[j][i1];
- scarr[l] += n_dis_consweight_multi[j][l] * cpmx1[j][i1];
+// scarr[l] += n_dis_consweight_multi[j][l] * cpmx1[j][i1];
+ scarr[l] += n_dynamicmtx[j][l] * cpmx1[j][i1];
}
matchpt = match;
cpmxpdnptpt = cpmxpdn;
matchpt++;
}
}
+ free( scarr );
+// fprintf( stderr, "done\n" );
#else
int j, k, l;
- float scarr[26];
- float **cpmxpd = floatwork;
+// double scarr[26];
+ double **cpmxpd = doublework;
int **cpmxpdn = intwork;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
// simple
if( initialize )
{
for( j=0; j<lgth2; j++ )
{
count = 0;
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
if( cpmx2[l][j] )
{
cpmxpdn[count][j] = -1;
}
}
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
scarr[l] = 0.0;
- for( k=0; k<26; k++ )
- scarr[l] += n_dis_consweight_multi[k][l] * cpmx1[k][i1];
+ for( k=0; k<nalphabets; k++ )
// scarr[l] += n_dis[k][l] * cpmx1[k][i1];
+// scarr[l] += n_dis_consweight_multi[k][l] * cpmx1[k][i1];
+ scarr[l] += n_dynamicmtx[k][l] * cpmx1[k][i1];
}
for( j=0; j<lgth2; j++ )
{
for( k=0; cpmxpdn[k][j]>-1; k++ )
match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j];
}
+ free( scarr );
#endif
}
-static void Atracking_localhom( float *impwmpt, float *lasthorizontalw, float *lastverticalw,
+static void Atracking_localhom( double *impwmpt, double *lasthorizontalw, double *lastverticalw,
char **seq1, char **seq2,
char **mseq1, char **mseq2,
- float **cpmx1, float **cpmx2,
- int **ijp, int icyc, int jcyc )
+ int **ijp, int icyc, int jcyc,
+ int *warpis, int *warpjs, int warpbase,
+ int *ngap1, int *ngap2, int reuseprofiles )
{
- int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k;
- float wm;
+ int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;
+ double wm;
char *gaptable1, *gt1bk;
char *gaptable2, *gt2bk;
lgth1 = strlen( seq1[0] );
*gaptable1 = 0;
gaptable2 = gt2bk + lgth1+lgth2;
*gaptable2 = 0;
+ *ngap1 = *ngap2 = 0;
iin = lgth1; jin = lgth2;
+ limk = lgth1+lgth2 + 1;
*impwmpt = 0.0;
- for( k=0; k<=lgth1+lgth2; k++ )
+ for( k=0; k<limk; k++ )
{
- if( ijp[iin][jin] < 0 )
+ if( ijp[iin][jin] >= warpbase )
+ {
+ ifi = warpis[ijp[iin][jin]-warpbase];
+ jfi = warpjs[ijp[iin][jin]-warpbase];
+ }
+ else if( ijp[iin][jin] < 0 )
{
ifi = iin-1; jfi = jin+ijp[iin][jin];
}
{
ifi = iin-1; jfi = jin-1;
}
- l = iin - ifi;
- while( --l )
+ if( ifi == -warpbase && jfi == -warpbase )
{
- *--gaptable1 = 'o';
- *--gaptable2 = '-';
- k++;
+ l = iin;
+ while( --l >= 0 )
+ {
+ *--gaptable1 = 'o';
+ *--gaptable2 = '-';
+ k++;
+ *ngap2 = 1;
+ }
+ l= jin;
+ while( --l >= 0 )
+ {
+ *--gaptable1 = '-';
+ *--gaptable2 = 'o';
+ k++;
+ *ngap1 = 1;
+ }
+ break;
}
- l= jin - jfi;
- while( --l )
+ else
{
- *--gaptable1 = '-';
- *--gaptable2 = 'o';
- k++;
+ l = iin - ifi;
+ while( --l )
+ {
+ *--gaptable1 = 'o';
+ *--gaptable2 = '-';
+ k++;
+ *ngap2 = 1;
+ }
+ l= jin - jfi;
+ while( --l )
+ {
+ *--gaptable1 = '-';
+ *--gaptable2 = 'o';
+ k++;
+ *ngap1 = 1;
+ }
}
if( iin == lgth1 || jin == lgth2 )
;
else
{
- *impwmpt += imp_match_out_sc( iin, jin );
+ *impwmpt += (double)imp_match_out_sc( iin, jin );
// fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] );
}
iin = ifi; jin = jfi;
}
- for( i=0; i<icyc; i++ ) gapireru( mseq1[i], seq1[i], gaptable1 );
- for( j=0; j<jcyc; j++ ) gapireru( mseq2[j], seq2[j], gaptable2 );
+ if( *ngap1 || !reuseprofiles )
+ for( i=0; i<icyc; i++ ) gapireru( mseq1[i], seq1[i], gaptable1 );
+
+ if( *ngap2 || !reuseprofiles )
+ for( j=0; j<jcyc; j++ ) gapireru( mseq2[j], seq2[j], gaptable2 );
free( gt1bk );
free( gt2bk );
}
-static void Atracking_localhom_gapmap( float *impwmpt, float *lasthorizontalw, float *lastverticalw,
+
+static double Atracking( double *lasthorizontalw, double *lastverticalw,
char **seq1, char **seq2,
char **mseq1, char **mseq2,
- float **cpmx1, float **cpmx2,
int **ijp, int icyc, int jcyc,
- int *gapmap1, int *gapmap2 )
+ int tailgp,
+ int *warpis, int *warpjs, int warpbase,
+ int *ngap1, int *ngap2, int reuseprofiles )
{
- int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k;
- float wm;
+ int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;
+ double wm;
char *gaptable1, *gt1bk;
char *gaptable2, *gt2bk;
lgth1 = strlen( seq1[0] );
lgth2 = strlen( seq2[0] );
+
gt1bk = AllocateCharVec( lgth1+lgth2+1 );
gt2bk = AllocateCharVec( lgth1+lgth2+1 );
}
#endif
- if( outgap == 1 )
+ if( tailgp == 1 )
;
else
{
gaptable2 = gt2bk + lgth1+lgth2;
*gaptable2 = 0;
+ *ngap1 = *ngap2 = 0;
iin = lgth1; jin = lgth2;
- *impwmpt = 0.0;
- for( k=0; k<=lgth1+lgth2; k++ )
+ limk = lgth1+lgth2 + 1;
+ for( k=0; k<limk; k++ )
{
- if( ijp[iin][jin] < 0 )
+ if( ijp[iin][jin] >= warpbase )
+ {
+ ifi = warpis[ijp[iin][jin]-warpbase];
+ jfi = warpjs[ijp[iin][jin]-warpbase];
+ }
+ else if( ijp[iin][jin] < 0 )
{
ifi = iin-1; jfi = jin+ijp[iin][jin];
}
{
ifi = iin-1; jfi = jin-1;
}
- l = iin - ifi;
- while( --l )
- {
- *--gaptable1 = 'o';
- *--gaptable2 = '-';
- k++;
- }
- l= jin - jfi;
- while( --l )
+
+ if( ifi == -warpbase && jfi == -warpbase )
{
- *--gaptable1 = '-';
- *--gaptable2 = 'o';
- k++;
+ l = iin;
+ while( --l >= 0 )
+ {
+ *--gaptable1 = 'o';
+ *--gaptable2 = '-';
+ k++;
+ *ngap2=1;
+ }
+ l= jin;
+ while( --l >= 0 )
+ {
+ *--gaptable1 = '-';
+ *--gaptable2 = 'o';
+ k++;
+ *ngap1=1;
+ }
+ break;
}
- if( iin == lgth1 || jin == lgth2 )
- ;
else
{
- *impwmpt += imp_match_out_sc( gapmap1[iin], gapmap2[jin] );
-
-// fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] );
+ l = iin - ifi;
+ while( --l )
+ {
+ *--gaptable1 = 'o';
+ *--gaptable2 = '-';
+ k++;
+ *ngap2=1;
+ }
+ l= jin - jfi;
+ while( --l )
+ {
+ *--gaptable1 = '-';
+ *--gaptable2 = 'o';
+ k++;
+ *ngap1=1;
+ }
}
if( iin <= 0 || jin <= 0 ) break;
- *--gaptable1 = '-';
- *--gaptable2 = '-';
+ *--gaptable1 = 'o';
+ *--gaptable2 = 'o';
k++;
iin = ifi; jin = jfi;
}
- for( i=0; i<icyc; i++ ) gapireru( mseq1[i], seq1[i], gaptable1 );
- for( j=0; j<jcyc; j++ ) gapireru( mseq2[j], seq2[j], gaptable2 );
+
+// reporterr( "gaptable1=%s\n", gaptable1 );
+// reporterr( "gaptable2=%s\n", gaptable2 );
+
+
+ if( *ngap1 || !reuseprofiles )
+ for( i=0; i<icyc; i++ ) gapireru( mseq1[i], seq1[i], gaptable1 );
+
+ if( *ngap2 || !reuseprofiles )
+ for( j=0; j<jcyc; j++ ) gapireru( mseq2[j], seq2[j], gaptable2 );
free( gt1bk );
free( gt2bk );
+
+ return( 0.0 );
}
-static float Atracking( float *lasthorizontalw, float *lastverticalw,
- char **seq1, char **seq2,
- char **mseq1, char **mseq2,
- float **cpmx1, float **cpmx2,
- int **ijp, int icyc, int jcyc,
- int tailgp )
+
+double A__align( double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp, int firstmem, int calledbyfulltreebase )
+/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
- int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k;
- float wm;
- char *gaptable1, *gt1bk;
- char *gaptable2, *gt2bk;
- lgth1 = strlen( seq1[0] );
- lgth2 = strlen( seq2[0] );
+ int reuseprofiles;
+ static TLS int previousfirstlen; // 2016/Feb/1 // MEMBER NO CHECK GA HITSUYOU!!!!
+ static TLS int previousicyc; // 2016/Feb/1 // MEMBER NO CHECK GA HITSUYOU!!!!
+ static TLS int previousfirstmem;
+ static TLS int previouscall;
+ int ngap1, ngap2;
+// int k;
+ register int i, j;
+ int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
+ int lgth1, lgth2;
+ int resultlen;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
+// double fpenalty = (double)penalty;
+#if USE_PENALTY_EX
+ double fpenalty_ex = (double)penalty_ex;
+#endif
+#if 1
+ double *wtmp;
+ int *ijppt;
+ double *mjpt, *prept, *curpt;
+ int *mpjpt;
+#endif
+ static TLS double mi, *m;
+ static TLS int **ijp;
+ static TLS int mpi, *mp;
+ static TLS double *w1, *w2;
+ static TLS double *match;
+ static TLS double *initverticalw; /* kufuu sureba iranai */
+ static TLS double *lastverticalw; /* kufuu sureba iranai */
+ static TLS char **mseq1;
+ static TLS char **mseq2;
+ static TLS char **mseq;
+ static TLS double *ogcp1, *ogcp1o;
+ static TLS double *ogcp2;
+ static TLS double *fgcp1, *fgcp1o;
+ static TLS double *fgcp2;
+ static TLS double **cpmx1;
+ static TLS double **cpmx2;
+ static TLS int **intwork;
+ static TLS double **doublework;
+ static TLS int orlgth1 = 0, orlgth2 = 0;
+ static TLS double *gapfreq1;
+ static TLS double *gapfreq2;
+ double fpenalty = (double)penalty;
+ double fpenalty_shift = (double)penalty_shift;
+ double *fgcp2pt;
+ double *ogcp2pt;
+ double fgcp1va;
+ double ogcp1va;
+ double *gf2pt;
+ double *gf2ptpre;
+ double gf1va;
+ double gf1vapre;
+ double headgapfreq1;
+ double headgapfreq2;
+
+ int *warpis = NULL;
+ int *warpjs = NULL;
+ int *warpi = NULL;
+ int *warpj = NULL;
+ int *prevwarpi = NULL;
+ int *prevwarpj = NULL;
+ double *wmrecords = NULL;
+ double *prevwmrecords = NULL;
+ int warpn = 0;
+ int warpbase;
+ double curm = 0.0;
+ double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt;
+ int *warpipt, *warpjpt;
+
+// for( i=0; i<icyc; i++ ) fprintf( stderr, "%s, %f\n", seq1[i], eff1[i] );
+// for( i=0; i<jcyc; i++ ) fprintf( stderr, "%s, %f\n", seq2[i], eff2[i] );
- gt1bk = AllocateCharVec( lgth1+lgth2+1 );
- gt2bk = AllocateCharVec( lgth1+lgth2+1 );
-#if 0
- for( i=0; i<lgth1; i++ )
- {
- fprintf( stderr, "lastverticalw[%d] = %f\n", i, lastverticalw[i] );
- }
-#endif
-
- if( tailgp == 1 )
- ;
- else
- {
- wm = lastverticalw[0];
- for( i=0; i<lgth1; i++ )
- {
- if( lastverticalw[i] >= wm )
- {
- wm = lastverticalw[i];
- iin = i; jin = lgth2-1;
- ijp[lgth1][lgth2] = +( lgth1 - i );
- }
- }
- for( j=0; j<lgth2; j++ )
- {
- if( lasthorizontalw[j] >= wm )
- {
- wm = lasthorizontalw[j];
- iin = lgth1-1; jin = j;
- ijp[lgth1][lgth2] = -( lgth2 - j );
- }
- }
- }
-
- for( i=0; i<lgth1+1; i++ )
- {
- ijp[i][0] = i + 1;
- }
- for( j=0; j<lgth2+1; j++ )
- {
- ijp[0][j] = -( j + 1 );
- }
-
- gaptable1 = gt1bk + lgth1+lgth2;
- *gaptable1 = 0;
- gaptable2 = gt2bk + lgth1+lgth2;
- *gaptable2 = 0;
-
- iin = lgth1; jin = lgth2;
- for( k=0; k<=lgth1+lgth2; k++ )
- {
- if( ijp[iin][jin] < 0 )
- {
- ifi = iin-1; jfi = jin+ijp[iin][jin];
- }
- else if( ijp[iin][jin] > 0 )
- {
- ifi = iin-ijp[iin][jin]; jfi = jin-1;
- }
- else
- {
- ifi = iin-1; jfi = jin-1;
- }
- l = iin - ifi;
- while( --l )
- {
- *--gaptable1 = 'o';
- *--gaptable2 = '-';
- k++;
- }
- l= jin - jfi;
- while( --l )
- {
- *--gaptable1 = '-';
- *--gaptable2 = 'o';
- k++;
- }
- if( iin <= 0 || jin <= 0 ) break;
- *--gaptable1 = 'o';
- *--gaptable2 = 'o';
- k++;
- iin = ifi; jin = jfi;
- }
-
- for( i=0; i<icyc; i++ ) gapireru( mseq1[i], seq1[i], gaptable1 );
- for( j=0; j<jcyc; j++ ) gapireru( mseq2[j], seq2[j], gaptable2 );
-
- free( gt1bk );
- free( gt2bk );
-
- return( 0.0 );
-}
-
-float A__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp )
-/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
-{
-// int k;
- register int i, j;
- int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
- int lgth1, lgth2;
- int resultlen;
- float wm = 0.0; /* int ?????? */
- float g;
- float *currentw, *previousw;
-// float fpenalty = (float)penalty;
-#if USE_PENALTY_EX
- float fpenalty_ex = (float)penalty_ex;
-#endif
-#if 1
- float *wtmp;
- int *ijppt;
- float *mjpt, *prept, *curpt;
- int *mpjpt;
-#endif
- static TLS float mi, *m;
- static TLS int **ijp;
- static TLS int mpi, *mp;
- static TLS float *w1, *w2;
- static TLS float *match;
- static TLS float *initverticalw; /* kufuu sureba iranai */
- static TLS float *lastverticalw; /* kufuu sureba iranai */
- static TLS char **mseq1;
- static TLS char **mseq2;
- static TLS char **mseq;
- static TLS float *ogcp1;
- static TLS float *ogcp2;
- static TLS float *fgcp1;
- static TLS float *fgcp2;
- static TLS float **cpmx1;
- static TLS float **cpmx2;
- static TLS int **intwork;
- static TLS float **floatwork;
- static TLS int orlgth1 = 0, orlgth2 = 0;
- float fpenalty = (float)penalty;
- float *fgcp2pt;
- float *ogcp2pt;
- float fgcp1va;
- float ogcp1va;
-
-
- if( seq1 == NULL )
+ if( seq1 == NULL )
{
if( orlgth1 )
{
orlgth1 = 0;
orlgth2 = 0;
- imp_match_init_strict( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0 );
+ imp_match_init_strict( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL );
free( mseq1 );
free( mseq2 );
FreeCharMtx( mseq );
FreeFloatVec( ogcp1 );
+ FreeFloatVec( ogcp1o );
FreeFloatVec( ogcp2 );
FreeFloatVec( fgcp1 );
+ FreeFloatVec( fgcp1o );
FreeFloatVec( fgcp2 );
FreeFloatMtx( cpmx1 );
FreeFloatMtx( cpmx2 );
- FreeFloatMtx( floatwork );
+ FreeFloatVec( gapfreq1 );
+ FreeFloatVec( gapfreq2 );
+
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
}
return( 0.0 );
}
+
lgth1 = strlen( seq1[0] );
lgth2 = strlen( seq2[0] );
-#if 1
+#if 0
if( lgth1 == 0 || lgth2 == 0 )
{
fprintf( stderr, "WARNING (Aalignmm): lgth1=%d, lgth2=%d\n", lgth1, lgth2 );
}
+#endif
if( lgth1 == 0 && lgth2 == 0 )
return( 0.0 );
{
j = lgth2;
seq1[i][j] = 0;
- while( j ) seq1[i][--j] = '-';
+ while( j ) seq1[i][--j] = *newgapstr;
// fprintf( stderr, "seq1[i] = %s\n", seq1[i] );
}
return( 0.0 );
{
j = lgth1;
seq2[i][j] = 0;
- while( j ) seq2[i][--j] = '-';
+ while( j ) seq2[i][--j] = *newgapstr;
// fprintf( stderr, "seq2[i] = %s\n", seq2[i] );
}
return( 0.0 );
}
-#endif
+
+ warpbase = lgth1 + lgth2;
+ warpis = NULL;
+ warpjs = NULL;
+ warpn = 0;
+
+
+
+ if( trywarp )
+ {
+// fprintf( stderr, "IN A__align, penalty_shift = %d\n", penalty_shift );
+ if( headgp == 0 || tailgp == 0 )
+ {
+ fprintf( stderr, "At present, headgp and tailgp must be 1 to allow shift.\n" );
+ exit( 1 );
+ }
+ wmrecords = AllocateFloatVec( lgth2+1 );
+ warpi = AllocateIntVec( lgth2+1 );
+ warpj = AllocateIntVec( lgth2+1 );
+ prevwmrecords = AllocateFloatVec( lgth2+1 );
+ prevwarpi = AllocateIntVec( lgth2+1 );
+ prevwarpj = AllocateIntVec( lgth2+1 );
+ for( i=0; i<lgth2+1; i++ ) wmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwarpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) prevwarpj[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpj[i] = -warpbase;
+ }
#if 0
{
int ll1, ll2;
+
if( orlgth1 > 0 && orlgth2 > 0 )
{
FreeFloatVec( w1 );
FreeCharMtx( mseq );
FreeFloatVec( ogcp1 );
+ FreeFloatVec( ogcp1o );
FreeFloatVec( ogcp2 );
FreeFloatVec( fgcp1 );
+ FreeFloatVec( fgcp1o );
FreeFloatVec( fgcp2 );
FreeFloatMtx( cpmx1 );
FreeFloatMtx( cpmx2 );
- FreeFloatMtx( floatwork );
+ FreeFloatVec( gapfreq1 );
+ FreeFloatVec( gapfreq2 );
+
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
}
mseq = AllocateCharMtx( njob, ll1+ll2 );
ogcp1 = AllocateFloatVec( ll1+2 );
+ ogcp1o = AllocateFloatVec( ll1+2 );
ogcp2 = AllocateFloatVec( ll2+2 );
fgcp1 = AllocateFloatVec( ll1+2 );
+ fgcp1o = AllocateFloatVec( ll1+2 );
fgcp2 = AllocateFloatVec( ll2+2 );
- cpmx1 = AllocateFloatMtx( 26, ll1+2 );
- cpmx2 = AllocateFloatMtx( 26, ll2+2 );
+ cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 );
+ cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 );
+ previousfirstlen = -1;
+ previousicyc = -1;
+
+ gapfreq1 = AllocateFloatVec( ll1+2 );
+ gapfreq2 = AllocateFloatVec( ll2+2 );
#if FASTMATCHCALC
- floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, 26 );
- intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, 27 );
+ doublework = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets );
+ intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets+1 );
#else
- floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 );
- intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 );
+ doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
+ intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
#endif
#if DEBUG
#if 0
{
- float t = 0.0;
+ double t = 0.0;
for( i=0; i<icyc; i++ )
t += eff1[i];
fprintf( stderr, "## totaleff = %f\n", t );
}
#endif
- cpmx_calc_new( seq1, cpmx1, eff1, lgth1, icyc );
+ if( calledbyfulltreebase && previouscall && firstmem >= 0 && firstmem == previousfirstmem && lgth1 == previousfirstlen && nthread == 0 && icyc == previousicyc+1 ) // mouhitotsu jouken ga iru. firstmem >= 0 && firstmem == previousfirstmem && calledbytreebase && prevcalledbytreebase
+ reuseprofiles = 1;
+ else
+ reuseprofiles = 0;
+
+
+ if( reuseprofiles )
+ {
+// reporterr( "r" );
+ cpmx_calc_add( seq1, cpmx1, eff1, lgth1, icyc );
+ }
+ else
+ {
+// reporterr( "n" );
+ cpmx_calc_new( seq1, cpmx1, eff1, lgth1, icyc );
+ }
cpmx_calc_new( seq2, cpmx2, eff2, lgth2, jcyc );
if( sgap1 )
{
- new_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1, sgap1 );
+ new_OpeningGapCount( ogcp1o, icyc, seq1, eff1, lgth1, sgap1 );
+ new_FinalGapCount( fgcp1o, icyc, seq1, eff1, lgth1, egap1 );
+
new_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2, sgap2 );
- new_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1, egap1 );
new_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2, egap2 );
+
+ outgapcount( &headgapfreq1, icyc, sgap1, eff1 );
+ outgapcount( &headgapfreq2, jcyc, sgap2, eff2 );
+ outgapcount( gapfreq1+lgth1, icyc, egap1, eff1 );
+ outgapcount( gapfreq2+lgth2, jcyc, egap2, eff2 );
}
else
{
- st_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1 );
+ if( reuseprofiles )
+ {
+ st_OpeningGapAdd( ogcp1o, icyc, seq1, eff1, lgth1 );
+ st_FinalGapAdd( fgcp1o, icyc, seq1, eff1, lgth1 );
+ }
+ else
+ {
+ st_OpeningGapCount( ogcp1o, icyc, seq1, eff1, lgth1 );
+ st_FinalGapCount( fgcp1o, icyc, seq1, eff1, lgth1 );
+ }
+
st_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2 );
- st_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1 );
st_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2 );
+
+ headgapfreq1 = 0.0;
+ headgapfreq2 = 0.0;
+ gapfreq1[lgth1] = 0.0;
+ gapfreq2[lgth2] = 0.0;
}
+ if( legacygapcost == 0 )
+ {
+ if( reuseprofiles )
+ gapcountadd( gapfreq1, seq1, icyc, eff1, lgth1 );
+ else
+ gapcountf( gapfreq1, seq1, icyc, eff1, lgth1 );
+
+ gapcountf( gapfreq2, seq2, jcyc, eff2, lgth2 );
+
+ for( i=0; i<lgth1+1; i++ ) gapfreq1[i] = 1.0 - gapfreq1[i];
+ for( i=0; i<lgth2+1; i++ ) gapfreq2[i] = 1.0 - gapfreq2[i];
+ headgapfreq1 = 1.0 - headgapfreq1;
+ headgapfreq2 = 1.0 - headgapfreq2;
+ }
+ else
+ {
+ for( i=0; i<lgth1+1; i++ ) gapfreq1[i] = 1.0;
+ for( i=0; i<lgth2+1; i++ ) gapfreq2[i] = 1.0;
+ headgapfreq1 = 1.0;
+ headgapfreq2 = 1.0;
+ }
+
+#if 0
+ fprintf( stderr, "\ngapfreq1[] =" );
+ for( i=0; i<lgth1; i++ ) fprintf( stderr, "%5.2f ", gapfreq1[i] );
+ fprintf( stderr, "\n" );
+
+ fprintf( stderr, "\ngapfreq2[] =" );
+ for( i=0; i<lgth2; i++ ) fprintf( stderr, "%5.2f ", gapfreq2[i] );
+ fprintf( stderr, "\n" );
+#endif
+
+
for( i=0; i<lgth1; i++ )
{
- ogcp1[i] = 0.5 * ( 1.0 - ogcp1[i] ) * fpenalty;
- fgcp1[i] = 0.5 * ( 1.0 - fgcp1[i] ) * fpenalty;
+ ogcp1[i] = 0.5 * ( 1.0 - ogcp1o[i] ) * fpenalty * ( gapfreq1[i] );
+ fgcp1[i] = 0.5 * ( 1.0 - fgcp1o[i] ) * fpenalty * ( gapfreq1[i] );
}
+
for( i=0; i<lgth2; i++ )
{
- ogcp2[i] = 0.5 * ( 1.0 - ogcp2[i] ) * fpenalty;
- fgcp2[i] = 0.5 * ( 1.0 - fgcp2[i] ) * fpenalty;
+ ogcp2[i] = 0.5 * ( 1.0 - ogcp2[i] ) * fpenalty * ( gapfreq2[i] );
+ fgcp2[i] = 0.5 * ( 1.0 - fgcp2[i] ) * fpenalty * ( gapfreq2[i] );
}
#if 0
for( i=0; i<lgth1; i++ )
currentw = w1;
previousw = w2;
- match_calc( initverticalw, cpmx2, cpmx1, 0, lgth1, floatwork, intwork, 1 );
+ match_calc( n_dynamicmtx, initverticalw, cpmx2, cpmx1, 0, lgth1, doublework, intwork, 1 );
if( localhom )
imp_match_out_vead_tate( initverticalw, 0, lgth1 ); // 060306
- match_calc( currentw, cpmx1, cpmx2, 0, lgth2, floatwork, intwork, 1 );
+ match_calc( n_dynamicmtx, currentw, cpmx1, cpmx2, 0, lgth2, doublework, intwork, 1 );
if( localhom )
imp_match_out_vead( currentw, 0, lgth2 ); // 060306
#if 0 // -> tbfast.c
if( localhom )
- imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 );
+ imp_match_calc( n_dynamicmtx, currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 );
#endif
{
for( i=1; i<lgth1+1; i++ )
{
- initverticalw[i] += ( ogcp1[0] + fgcp1[i-1] ) ;
+// initverticalw[i] += ( ogcp1[0] + fgcp1[i-1] ) ;
+ initverticalw[i] += ( ogcp1[0] * headgapfreq2 + fgcp1[i-1] * gapfreq2[0] ) ;
}
for( j=1; j<lgth2+1; j++ )
{
- currentw[j] += ( ogcp2[0] + fgcp2[j-1] ) ;
+// currentw[j] += ( ogcp2[0] + fgcp2[j-1] ) ;
+ currentw[j] += ( ogcp2[0] * headgapfreq1 + fgcp2[j-1] * gapfreq1[0] ) ;
}
}
#if OUTGAP0TRY
else
{
+ fprintf( stderr, "offset = %d\n", offset );
for( j=1; j<lgth2+1; j++ )
currentw[j] -= offset * j / 2.0;
for( i=1; i<lgth1+1; i++ )
initverticalw[i] -= offset * i / 2.0;
}
#endif
+#if 0
+ fprintf( stderr, "\n " );
+ for( j=0; j<lgth2+1; j++ ) fprintf( stderr, " %c ", seq2[0][j] );
+ fprintf( stderr, "\n%c ", seq1[0][0] );
+ for( j=0; j<lgth2+1; j++ )
+ {
+ fprintf( stderr, "%5.0f ", currentw[j] );
+ }
+ fprintf( stderr, "\n" );
+#endif
+
for( j=1; j<lgth2+1; ++j )
{
- m[j] = currentw[j-1] + ogcp1[1]; mp[j] = 0;
+// m[j] = currentw[j-1] + ogcp1[1]; mp[j] = 0;
+ m[j] = currentw[j-1] + ogcp1[1] * gapfreq2[j-1]; mp[j] = 0;;
}
if( lgth2 == 0 )
lastverticalw[0] = 0.0; // Falign kara yobaretatoki kounarukanousei ari
lastverticalw[0] = currentw[lgth2-1];
if( tailgp ) lasti = lgth1+1; else lasti = lgth1;
+ lastj = lgth2+1;
#if XXXXXXX
fprintf( stderr, "currentw = \n" );
previousw[0] = initverticalw[i-1];
- match_calc( currentw, cpmx1, cpmx2, i, lgth2, floatwork, intwork, 0 );
+ match_calc( n_dynamicmtx, currentw, cpmx1, cpmx2, i, lgth2, doublework, intwork, 0 );
#if XXXXXXX
fprintf( stderr, "\n" );
fprintf( stderr, "i=%d\n", i );
#endif
currentw[0] = initverticalw[i];
-
- mi = previousw[0] + ogcp2[1]; mpi = 0;
+#if 0
+ fprintf( stderr, "%c ", seq1[0][i] );
+ for( j=0; j<lgth2+1; j++ )
+ {
+ fprintf( stderr, "%5.0f ", currentw[j] );
+ }
+ fprintf( stderr, "\n" );
+#endif
+
+// mi = previousw[0] + ogcp2[1]; mpi = 0;
+ mi = previousw[0] + ogcp2[1] * gapfreq1[i-1]; mpi=0;
ijppt = ijp[i] + 1;
mjpt = m + 1;
prept = previousw;
ogcp2pt = ogcp2 + 1;
fgcp1va = fgcp1[i-1];
ogcp1va = ogcp1[i];
- lastj = lgth2+1;
+ gf1va = gapfreq1[i];
+ gf1vapre = gapfreq1[i-1];
+ gf2pt = gapfreq2+1;
+ gf2ptpre = gapfreq2;
+
+ if( trywarp )
+ {
+ prevwmrecordspt = prevwmrecords;
+ wmrecordspt = wmrecords+1;
+ wmrecords1pt = wmrecords;
+ warpipt = warpi + 1;
+ warpjpt = warpj + 1;
+ }
+
+
for( j=1; j<lastj; j++ )
{
#ifdef xxxenablemultithread
*ijppt = 0;
#if 0
+ fprintf( stderr, "\n i=%d, j=%d %c, %c", i, j, seq1[0][i], seq2[0][j] );
fprintf( stderr, "%5.0f->", wm );
+ fprintf( stderr, "%5.0f? (penal=%5.2f)", g=mi+*fgcp2pt*(1.0-gapfreq1[i]), *fgcp2pt*(1.0-gapfreq1[i]) );
#endif
-#if 0
- fprintf( stderr, "%5.0f?", g );
-#endif
- if( (g=mi+*fgcp2pt) > wm )
+ if( (g=mi+*fgcp2pt*gf1va) > wm )
{
wm = g;
*ijppt = -( j - mpi );
+// fprintf( stderr, "Jump to %d (%c)!", mpi, seq2[0][mpi] );
}
- if( (g=*prept+*ogcp2pt) >= mi )
+ if( (g=*prept+*ogcp2pt*gf1vapre) >= mi )
{
mi = g;
mpi = j-1;
#endif
#if 0
- fprintf( stderr, "%5.0f?", g );
+ fprintf( stderr, "%5.0f->", wm );
+ fprintf( stderr, "%5.0f? (penal=%5.2f)", g=*mjpt+fgcp1va*(1.0-gapfreq2[j]), fgcp1va*(1.0-gapfreq2[j]) );
#endif
- if( (g=*mjpt+fgcp1va) > wm )
+ if( (g=*mjpt+ fgcp1va* *gf2pt) > wm )
{
wm = g;
*ijppt = +( i - *mpjpt );
+// fprintf( stderr, "Jump to %d (%c)!", *mpjpt, seq1[0][*mpjpt] );
}
- if( (g=*prept+ogcp1va) >= *mjpt )
+ if( (g=*prept+ ogcp1va* *gf2ptpre) >= *mjpt )
{
*mjpt = g;
*mpjpt = i-1;
m[j] += fpenalty_ex;
#endif
+
+ if( trywarp )
+ {
+#if USE_PENALTY_EX
+ if( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai
+#else
+ if( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai
+#endif
+ {
+// fprintf( stderr, "WARP!!\n" );
+ if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] )
+ {
+ *ijppt = warpbase + warpn - 1;
+ }
+ else
+ {
+ *ijppt = warpbase + warpn;
+ warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) );
+ warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) );
+ warpis[warpn] = prevwarpi[j-1];
+ warpjs[warpn] = prevwarpj[j-1];
+ warpn++;
+ }
+ wm = g;
+ }
+
#if 0
- fprintf( stderr, "%5.0f ", wm );
+ fprintf( stderr, "%5.0f ", wm );
#endif
+ curm = *curpt + wm;
+
+ if( *wmrecords1pt > *wmrecordspt )
+ {
+ *wmrecordspt = *wmrecords1pt;
+ *warpipt = *(warpipt-1);
+ *warpjpt = *(warpjpt-1);
+ }
+ if( curm > *wmrecordspt )
+ {
+ *wmrecordspt = curm;
+ *warpipt = i;
+ *warpjpt = j;
+ }
+ wmrecordspt++;
+ wmrecords1pt++;
+ warpipt++;
+ warpjpt++;
+ }
+
*curpt++ += wm;
ijppt++;
mjpt++;
mpjpt++;
fgcp2pt++;
ogcp2pt++;
+ gf2ptpre++;
+ gf2pt++;
}
lastverticalw[i] = currentw[lgth2-1];
+
+ if( trywarp )
+ {
+ fltncpy( prevwmrecords, wmrecords, lastj );
+ intncpy( prevwarpi, warpi, lastj );
+ intncpy( prevwarpj, warpj, lastj );
+ }
}
-// fprintf( stderr, "wm = %f\n", wm );
+ if( trywarp )
+ {
+// fprintf( stderr, "wm = %f\n", wm );
+// fprintf( stderr, "warpn = %d\n", warpn );
+ free( wmrecords );
+ free( prevwmrecords );
+ free( warpi );
+ free( warpj );
+ free( prevwarpi );
+ free( prevwarpj );
+ }
#if OUTGAP0TRY
if( !outgap )
*/
if( localhom )
{
- Atracking_localhom( impmatch, currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp, icyc, jcyc );
+ Atracking_localhom( impmatch, currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, warpis, warpjs, warpbase, &ngap1, &ngap2, reuseprofiles );
}
else
- Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp, icyc, jcyc, tailgp );
+ Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, tailgp, warpis, warpjs, warpbase, &ngap1, &ngap2, reuseprofiles );
+
+// reporterr( "reuseprofiles after Atracking = %d\n", reuseprofiles );
+
+ if( warpis ) free( warpis );
+ if( warpjs ) free( warpjs );
// fprintf( stderr, "### impmatch = %f\n", *impmatch );
}
- for( i=0; i<icyc; i++ ) strcpy( seq1[i], mseq1[i] );
- for( j=0; j<jcyc; j++ ) strcpy( seq2[j], mseq2[j] );
+ if( ngap1 || !reuseprofiles )
+ for( i=0; i<icyc; i++ ) strcpy( seq1[i], mseq1[i] );
+ if( ngap2 || !reuseprofiles )
+ for( j=0; j<jcyc; j++ ) strcpy( seq2[j], mseq2[j] );
+
#if 0
fprintf( stderr, "\n" );
for( i=0; i<icyc; i++ ) fprintf( stderr, "%s\n", mseq1[i] );
for( j=0; j<jcyc; j++ ) fprintf( stderr, "%s\n", mseq2[j] );
#endif
-// fprintf( stderr, "wm = %f\n", wm );
+// fprintf( stdout, "firstmem=%d, icyc=%d, jcyc=%d, wm = %f\n", firstmem, icyc, jcyc, wm );
+
+// fprintf( stderr, "lgth1 = %d\n", lgth1 );
+// fprintf( stderr, "-> %d\n", strlen( seq1[0] ) );
+ previousfirstlen = lgth1;
+ previousfirstmem = firstmem;
+ previousicyc = icyc;
+ previouscall = calledbyfulltreebase;
return( wm );
}
-float A__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, int *gapmap1, int *gapmap2 )
+double A__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, int *gapmap1, int *gapmap2 )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
+ fprintf( stderr, "Unexpected error. Please contact kazutaka.katoh@aist.go.jp\n" );
+ exit( 1 );
+}
+
+
+double A__align_variousdist( int **which, double ***matrices, double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp )
+/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
+{
+
+
// int k;
- register int i, j;
- int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
+ register int i, j, c;
+ int ngap1, ngap2;
+ int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
int lgth1, lgth2;
int resultlen;
- float wm = 0.0; /* int ?????? */
- float g;
- float *currentw, *previousw;
-// float fpenalty = (float)penalty;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
+// double fpenalty = (double)penalty;
#if USE_PENALTY_EX
- float fpenalty_ex = (float)penalty_ex;
+ double fpenalty_ex = (double)penalty_ex;
#endif
#if 1
- float *wtmp;
+ double *wtmp;
int *ijppt;
- float *mjpt, *prept, *curpt;
+ double *mjpt, *prept, *curpt;
int *mpjpt;
#endif
- static TLS float mi, *m;
+ static TLS double mi, *m;
static TLS int **ijp;
static TLS int mpi, *mp;
- static TLS float *w1, *w2;
- static TLS float *match;
- static TLS float *initverticalw; /* kufuu sureba iranai */
- static TLS float *lastverticalw; /* kufuu sureba iranai */
+ static TLS double *w1, *w2;
+ static TLS double *match;
+ static TLS double *initverticalw; /* kufuu sureba iranai */
+ static TLS double *lastverticalw; /* kufuu sureba iranai */
static TLS char **mseq1;
static TLS char **mseq2;
static TLS char **mseq;
- static TLS float *ogcp1;
- static TLS float *ogcp2;
- static TLS float *fgcp1;
- static TLS float *fgcp2;
- static TLS float **cpmx1;
- static TLS float **cpmx2;
- static TLS int **intwork;
- static TLS float **floatwork;
+ static TLS double *ogcp1;
+ static TLS double *ogcp2;
+ static TLS double *fgcp1;
+ static TLS double *fgcp2;
+ static TLS double ***cpmx1s;
+ static TLS double ***cpmx2s;
+ static TLS int ***intwork;
+ static TLS double ***doublework;
static TLS int orlgth1 = 0, orlgth2 = 0;
- float *fgcp2pt;
- float *ogcp2pt;
- float fgcp1va;
- float ogcp1va;
+ static TLS double *gapfreq1;
+ static TLS double *gapfreq2;
+ double fpenalty = (double)penalty;
+ double fpenalty_shift = (double)penalty_shift;
+ double *fgcp2pt;
+ double *ogcp2pt;
+ double fgcp1va;
+ double ogcp1va;
+ double *gf2pt;
+ double *gf2ptpre;
+ double gf1va;
+ double gf1vapre;
+ double headgapfreq1;
+ double headgapfreq2;
+
+ int *warpis = NULL;
+ int *warpjs = NULL;
+ int *warpi = NULL;
+ int *warpj = NULL;
+ int *prevwarpi = NULL;
+ int *prevwarpj = NULL;
+ double *wmrecords = NULL;
+ double *prevwmrecords = NULL;
+ int warpn = 0;
+ int warpbase;
+ double curm = 0.0;
+ double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt;
+ int *warpipt, *warpjpt;
+ int *nmask, **masklist1, **masklist2;
-#if 0
- fprintf( stderr, "eff in SA+++align\n" );
- for( i=0; i<icyc; i++ ) fprintf( stderr, "eff1[%d] = %f\n", i, eff1[i] );
-#endif
- if( orlgth1 == 0 )
+ if( seq1 == NULL )
{
- mseq1 = AllocateCharMtx( njob, 0 );
- mseq2 = AllocateCharMtx( njob, 0 );
+ if( orlgth1 )
+ {
+// fprintf( stderr, "## Freeing local arrays in A__align\n" );
+ orlgth1 = 0;
+ orlgth2 = 0;
+
+ imp_match_init_strict( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL );
+
+ free( mseq1 );
+ free( mseq2 );
+ FreeFloatVec( w1 );
+ FreeFloatVec( w2 );
+ FreeFloatVec( match );
+ FreeFloatVec( initverticalw );
+ FreeFloatVec( lastverticalw );
+
+ FreeFloatVec( m );
+ FreeIntVec( mp );
+
+ FreeCharMtx( mseq );
+
+ FreeFloatVec( ogcp1 );
+ FreeFloatVec( ogcp2 );
+ FreeFloatVec( fgcp1 );
+ FreeFloatVec( fgcp2 );
+
+
+ FreeFloatCub( cpmx1s );
+ FreeFloatCub( cpmx2s );
+
+ FreeFloatVec( gapfreq1 );
+ FreeFloatVec( gapfreq2 );
+
+ FreeFloatCub( doublework );
+ FreeIntCub( intwork );
+
+ }
+ else
+ {
+// fprintf( stderr, "## Not allocated\n" );
+ }
+ return( 0.0 );
}
+#if SLOW
+ nmask = calloc( maxdistclass, sizeof( int ) );
+#else
+ masklist1 = AllocateIntMtx( maxdistclass, 0 );
+ masklist2 = AllocateIntMtx( maxdistclass, 0 );
+ nmask = calloc( maxdistclass, sizeof( int ) );
+
+ for( c=0; c<maxdistclass; c++ )
+ {
+ for( i=0; i<icyc; i++ ) for( j=0; j<jcyc; j++ )
+ {
+ if( eff1s[c][i] * eff2s[c][j] != 0.0 )
+ {
+
+ if( c != which[i][j] )
+ {
+ masklist1[c] = realloc( masklist1[c], sizeof( int ) * (nmask[c]+1) );
+ masklist2[c] = realloc( masklist2[c], sizeof( int ) * (nmask[c]+1) );
+
+ masklist1[c][nmask[c]] = i;
+ masklist2[c][nmask[c]] = j;
+ nmask[c]++;
+ }
+ }
+ }
+ }
+ for( c=0; c<maxdistclass; c++ ) if( nmask[c] ) break;
+ if( c<maxdistclass ) reporterr( "Found a complex grouping. This step may be a bit slow.\n" );
+#endif
+
lgth1 = strlen( seq1[0] );
lgth2 = strlen( seq2[0] );
#if 0
if( lgth1 == 0 || lgth2 == 0 )
{
- fprintf( stderr, "WARNING (Aalign_gapmap): lgth1=%d, lgth2=%d\n", lgth1, lgth2 );
+ fprintf( stderr, "WARNING (Aalignmm): lgth1=%d, lgth2=%d\n", lgth1, lgth2 );
+ }
+#endif
+ if( lgth1 == 0 && lgth2 == 0 )
+ return( 0.0 );
+
+ if( lgth1 == 0 )
+ {
+ for( i=0; i<icyc; i++ )
+ {
+ j = lgth2;
+ seq1[i][j] = 0;
+ while( j ) seq1[i][--j] = *newgapstr;
+// fprintf( stderr, "seq1[i] = %s\n", seq1[i] );
+ }
+ return( 0.0 );
+ }
+
+ if( lgth2 == 0 )
+ {
+ for( i=0; i<jcyc; i++ )
+ {
+ j = lgth1;
+ seq2[i][j] = 0;
+ while( j ) seq2[i][--j] = *newgapstr;
+// fprintf( stderr, "seq2[i] = %s\n", seq2[i] );
+ }
+ return( 0.0 );
+ }
+
+ warpbase = lgth1 + lgth2;
+ warpis = NULL;
+ warpjs = NULL;
+ warpn = 0;
+
+ if( trywarp )
+ {
+// fprintf( stderr, "In A__align_variousdist !!!!!\n" );
+ if( headgp == 0 || tailgp == 0 )
+ {
+ fprintf( stderr, "At present, headgp and tailgp must be 1.\n" );
+ exit( 1 );
+ }
+ wmrecords = AllocateFloatVec( lgth2+1 );
+ warpi = AllocateIntVec( lgth2+1 );
+ warpj = AllocateIntVec( lgth2+1 );
+ prevwmrecords = AllocateFloatVec( lgth2+1 );
+ prevwarpi = AllocateIntVec( lgth2+1 );
+ prevwarpj = AllocateIntVec( lgth2+1 );
+ for( i=0; i<lgth2+1; i++ ) wmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwarpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) prevwarpj[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpj[i] = -warpbase;
}
+
+#if 0
+ fprintf( stderr, "#### eff in SA+++align\n" );
+ fprintf( stderr, "#### seq1[0] = %s\n", seq1[0] );
+ fprintf( stderr, "#### strlen( seq1[0] ) = %d\n", strlen( seq1[0] ) );
+ for( i=0; i<icyc; i++ ) fprintf( stderr, "eff1[%d] = %f\n", i, eff1[i] );
+ fprintf( stderr, "#### seq2[0] = %s\n", seq2[0] );
+ fprintf( stderr, "#### strlen( seq2[0] ) = %d\n", strlen( seq2[0] ) );
+ for( i=0; i<jcyc; i++ ) fprintf( stderr, "eff2[%d] = %f\n", i, eff2[i] );
#endif
+ if( orlgth1 == 0 )
+ {
+ mseq1 = AllocateCharMtx( njob, 0 );
+ mseq2 = AllocateCharMtx( njob, 0 );
+ }
+
+
if( lgth1 > orlgth1 || lgth2 > orlgth2 )
{
int ll1, ll2;
+
if( orlgth1 > 0 && orlgth2 > 0 )
{
FreeFloatVec( w1 );
FreeFloatVec( fgcp2 );
- FreeFloatMtx( cpmx1 );
- FreeFloatMtx( cpmx2 );
+ FreeFloatCub( cpmx1s );
+ FreeFloatCub( cpmx2s );
- FreeFloatMtx( floatwork );
- FreeIntMtx( intwork );
+ FreeFloatVec( gapfreq1 );
+ FreeFloatVec( gapfreq2 );
+
+ FreeFloatCub( doublework );
+ FreeIntCub( intwork );
}
ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
fgcp1 = AllocateFloatVec( ll1+2 );
fgcp2 = AllocateFloatVec( ll2+2 );
- cpmx1 = AllocateFloatMtx( 26, ll1+2 );
- cpmx2 = AllocateFloatMtx( 26, ll2+2 );
+ cpmx1s = AllocateFloatCub( maxdistclass, nalphabets, ll1+2 );
+ cpmx2s = AllocateFloatCub( maxdistclass, nalphabets, ll2+2 );
-#if FASTMATCHCALC
- floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, 26 );
- intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, 26 );
-#else
- floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 );
- intwork = AllocateIntMtx( 27, MAX( ll1, ll2 )+2 );
-#endif
+ gapfreq1 = AllocateFloatVec( ll1+2 );
+ gapfreq2 = AllocateFloatVec( ll2+2 );
+
+ doublework = AllocateFloatCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets );
+ intwork = AllocateIntCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets+1 );
#if DEBUG
fprintf( stderr, "succeeded\n" );
}
ijp = commonIP;
- cpmx_calc_new( seq1, cpmx1, eff1, strlen( seq1[0] ), icyc );
- cpmx_calc_new( seq2, cpmx2, eff2, strlen( seq2[0] ), jcyc );
+#if 0
+ {
+ double t = 0.0;
+ for( i=0; i<icyc; i++ )
+ t += eff1[i];
+ fprintf( stderr, "## totaleff = %f\n", t );
+ }
+#endif
+
+#if SLOW
+#else
+// cpmx_calc_new( seq1, cpmx1, eff1, lgth1, icyc );
+// cpmx_calc_new( seq2, cpmx2, eff2, lgth2, jcyc );
+ for( c=0; c<maxdistclass; c++ )
+ {
+ cpmx_calc_new( seq1, cpmx1s[c], eff1s[c], lgth1, icyc );
+ cpmx_calc_new( seq2, cpmx2s[c], eff2s[c], lgth2, jcyc );
+ }
+#endif
+
+ if( sgap1 )
+ {
+ new_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1, sgap1 );
+ new_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2, sgap2 );
+ new_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1, egap1 );
+ new_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2, egap2 );
+ outgapcount( &headgapfreq1, icyc, sgap1, eff1 );
+ outgapcount( &headgapfreq2, jcyc, sgap2, eff2 );
+ outgapcount( gapfreq1+lgth1, icyc, egap1, eff1 );
+ outgapcount( gapfreq2+lgth2, jcyc, egap2, eff2 );
+ }
+ else
+ {
+ st_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1 );
+ st_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2 );
+ st_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1 );
+ st_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2 );
+ headgapfreq1 = 0.0;
+ headgapfreq2 = 0.0;
+ gapfreq1[lgth1] = 0.0;
+ gapfreq2[lgth2] = 0.0;
+ }
+
+ if( legacygapcost == 0 )
+ {
+ gapcountf( gapfreq1, seq1, icyc, eff1, lgth1 );
+ gapcountf( gapfreq2, seq2, jcyc, eff2, lgth2 );
+ for( i=0; i<lgth1+1; i++ ) gapfreq1[i] = 1.0 - gapfreq1[i];
+ for( i=0; i<lgth2+1; i++ ) gapfreq2[i] = 1.0 - gapfreq2[i];
+ headgapfreq1 = 1.0 - headgapfreq1;
+ headgapfreq2 = 1.0 - headgapfreq2;
+ }
+ else
+ {
+ for( i=0; i<lgth1+1; i++ ) gapfreq1[i] = 1.0;
+ for( i=0; i<lgth2+1; i++ ) gapfreq2[i] = 1.0;
+ headgapfreq1 = 1.0;
+ headgapfreq2 = 1.0;
+ }
- st_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1 );
- st_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2 );
- st_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1 );
- st_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2 );
+#if 0
+ fprintf( stderr, "\ngapfreq1[] =" );
+ for( i=0; i<lgth1; i++ ) fprintf( stderr, "%5.2f ", gapfreq1[i] );
+ fprintf( stderr, "\n" );
+
+ fprintf( stderr, "\ngapfreq2[] =" );
+ for( i=0; i<lgth2; i++ ) fprintf( stderr, "%5.2f ", gapfreq2[i] );
+ fprintf( stderr, "\n" );
+#endif
+
for( i=0; i<lgth1; i++ )
{
- ogcp1[i] = 0.5 * ( 1.0 - ogcp1[i] );
- fgcp1[i] = 0.5 * ( 1.0 - fgcp1[i] );
+ ogcp1[i] = 0.5 * ( 1.0 - ogcp1[i] ) * fpenalty * ( gapfreq1[i] );
+ fgcp1[i] = 0.5 * ( 1.0 - fgcp1[i] ) * fpenalty * ( gapfreq1[i] );
}
+
for( i=0; i<lgth2; i++ )
{
- ogcp2[i] = 0.5 * ( 1.0 - ogcp2[i] );
- fgcp2[i] = 0.5 * ( 1.0 - fgcp2[i] );
+ ogcp2[i] = 0.5 * ( 1.0 - ogcp2[i] ) * fpenalty * ( gapfreq2[i] );
+ fgcp2[i] = 0.5 * ( 1.0 - fgcp2[i] ) * fpenalty * ( gapfreq2[i] );
}
#if 0
for( i=0; i<lgth1; i++ )
currentw = w1;
previousw = w2;
+// for( i=0; i<icyc; i++ ) fprintf( stderr, "seq1[i] = %s\n", seq1[i] );
+// for( j=0; j<jcyc; j++ ) fprintf( stderr, "seq2[j] = %s\n", seq2[j] );
+
+#if SLOW
+ match_calc_slow( which, matrices, initverticalw, jcyc, seq2, eff2, icyc, seq1, eff1, 0, lgth1, *doublework, *intwork, 1, 1 );
+#else
+ fillzero( initverticalw, lgth1 );
+ for( c=0; c<maxdistclass; c++ )
+ {
+// fprintf( stderr, "c=%d matrices[c][W][W] = %f\n", c, matrices[c][amino_n['W']][amino_n['W']] );
+// for( i=0; i<lgth1; i++ ) fprintf( stderr, "seq1[i] = %c, cpmx1s[c][3][%d] = %f\n", seq1[0][i], i, cpmx1s[c][3][i] );
+// for( i=0; i<lgth2; i++ ) fprintf( stderr, "seq2[i] = %c, cpmx2s[c][3][%d] = %f\n", seq2[0][i], i, cpmx2s[c][3][i] );
+ match_calc_add( matrices[c], initverticalw, cpmx2s[c], cpmx1s[c], 0, lgth1, doublework[c], intwork[c], 1 );
+// for( i=0; i<lgth1; i++ ) fprintf( stderr, "c=%d, %d - %f\n", c, i, initverticalw[i] );
+ if( nmask[c] ) match_calc_del( which, matrices, initverticalw, jcyc, seq2, eff2, icyc, seq1, eff1, 0, lgth1, c, nmask[c], masklist2[c], masklist1[c] );
+ }
+// for( i=0; i<lgth1; i++ ) fprintf( stderr, "%d - %f\n", i, initverticalw[i] );
+#endif
+
+// exit( 1 );
- match_calc( initverticalw, cpmx2, cpmx1, 0, lgth1, floatwork, intwork, 1 );
if( localhom )
- imp_match_out_vead_tate_gapmap( initverticalw, gapmap2[0], lgth1, gapmap1 ); // 060306
+ imp_match_out_vead_tate( initverticalw, 0, lgth1 ); // 060306
+#if SLOW
+ match_calc_slow( which, matrices, currentw, icyc, seq1, eff1, jcyc, seq2, eff2, 0, lgth2, *doublework, *intwork, 1, 0 );
+// for( i=0; i<lgth2; i++ ) fprintf( stderr, "%d - %f\n", i, currentw[i] );
+// exit( 1 );
+#else
+ fillzero( currentw, lgth2 );
+ for( c=0; c<maxdistclass; c++ )
+ {
+ match_calc_add( matrices[c], currentw, cpmx1s[c], cpmx2s[c], 0, lgth2, doublework[c], intwork[c], 1 );
+ if( nmask[c] ) match_calc_del( which, matrices, currentw, icyc, seq1, eff1, jcyc, seq2, eff2, 0, lgth2, c, nmask[c], masklist1[c], masklist2[c] );
+ }
+// for( i=0; i<lgth2; i++ ) fprintf( stderr, "%d - %f\n", i, currentw[i] );
+// exit( 1 );
+#endif
- match_calc( currentw, cpmx1, cpmx2, 0, lgth2, floatwork, intwork, 1 );
if( localhom )
- imp_match_out_vead_gapmap( currentw, gapmap1[0], lgth2, gapmap2 ); // 060306
+ imp_match_out_vead( currentw, 0, lgth2 ); // 060306
#if 0 // -> tbfast.c
if( localhom )
- imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 );
+ imp_match_calc( n_dynamicmtx, currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 );
#endif
- if( 1 ) // tsuneni outgap=1
+ if( headgp == 1 )
{
for( i=1; i<lgth1+1; i++ )
{
- initverticalw[i] += ( ogcp1[0] + fgcp1[i-1] ) ;
+// initverticalw[i] += ( ogcp1[0] + fgcp1[i-1] ) ;
+ initverticalw[i] += ( ogcp1[0] * headgapfreq2 + fgcp1[i-1] * gapfreq2[0] ) ;
}
for( j=1; j<lgth2+1; j++ )
{
- currentw[j] += ( ogcp2[0] + fgcp2[j-1] ) ;
+// currentw[j] += ( ogcp2[0] + fgcp2[j-1] ) ;
+ currentw[j] += ( ogcp2[0] * headgapfreq1 + fgcp2[j-1] * gapfreq1[0] ) ;
}
}
#if OUTGAP0TRY
else
{
+ fprintf( stderr, "offset = %d\n", offset );
for( j=1; j<lgth2+1; j++ )
currentw[j] -= offset * j / 2.0;
for( i=1; i<lgth1+1; i++ )
initverticalw[i] -= offset * i / 2.0;
}
#endif
+#if 0
+ fprintf( stderr, "\n " );
+ for( j=0; j<lgth2+1; j++ ) fprintf( stderr, " %c ", seq2[0][j] );
+ fprintf( stderr, "\n%c ", seq1[0][0] );
+ for( j=0; j<lgth2+1; j++ )
+ {
+ fprintf( stderr, "%5.0f ", currentw[j] );
+ }
+ fprintf( stderr, "\n" );
+#endif
+
for( j=1; j<lgth2+1; ++j )
{
- m[j] = currentw[j-1] + ogcp1[1]; mp[j] = 0;
+// m[j] = currentw[j-1] + ogcp1[1]; mp[j] = 0;
+ m[j] = currentw[j-1] + ogcp1[1] * gapfreq2[j-1]; mp[j] = 0;;
}
-
if( lgth2 == 0 )
- lastverticalw[0] = 0.0;
+ lastverticalw[0] = 0.0; // Falign kara yobaretatoki kounarukanousei ari
else
lastverticalw[0] = currentw[lgth2-1];
- if( 1 ) lasti = lgth1+1; else lasti = lgth1; // tsuneni outgap=1
+ if( tailgp ) lasti = lgth1+1; else lasti = lgth1;
+ lastj = lgth2+1;
#if XXXXXXX
fprintf( stderr, "currentw = \n" );
for( i=1; i<lasti; i++ )
{
+
+#ifdef enablemultithread
+// fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref );
+ if( chudanpt && *chudanpt != chudanref )
+ {
+// fprintf( stderr, "\n\n## CHUUDAN!!! S\n" );
+ if( masklist1 ) freeintmtx( masklist1, maxdistclass ); masklist1 = NULL;
+ if( masklist2 ) freeintmtx( masklist2, maxdistclass ); masklist2 = NULL;
+ if( nmask ) free( nmask ); nmask = NULL;
+ *chudanres = 1;
+ return( -1.0 );
+ }
+#endif
wtmp = previousw;
previousw = currentw;
currentw = wtmp;
previousw[0] = initverticalw[i-1];
- match_calc( currentw, cpmx1, cpmx2, i, lgth2, floatwork, intwork, 0 );
+#if SLOW
+ match_calc_slow( which, matrices, currentw, icyc, seq1, eff1, jcyc, seq2, eff2, i, lgth2, *doublework, *intwork, 0, 0 );
+#else
+ fillzero( currentw, lgth2 );
+ for( c=0; c<maxdistclass; c++ )
+ {
+ match_calc_add( matrices[c], currentw, cpmx1s[c], cpmx2s[c], i, lgth2, doublework[c], intwork[c], 0 );
+ if( nmask[c] ) match_calc_del( which, matrices, currentw, icyc, seq1, eff1, jcyc, seq2, eff2, i, lgth2, c, nmask[c], masklist1[c], masklist2[c] );
+ }
+#endif
+#if 0
+ if( i == 1 )
+ {
+ fprintf( stderr, "\n" );
+ for( j=0; j<lgth2; j++ ) fprintf( stderr, "%d - %f\n", j, currentw[j] );
+ exit( 1 );
+ }
+#endif
+
#if XXXXXXX
fprintf( stderr, "\n" );
fprintf( stderr, "i=%d\n", i );
#if 0
imp_match_out_vead( currentw, i, lgth2 );
#else
- imp_match_out_vead_gapmap( currentw, gapmap1[i], lgth2, gapmap2 );
+ imp_match_out_vead( currentw, i, lgth2 );
#endif
}
#if XXXXXXX
#endif
currentw[0] = initverticalw[i];
-
- mi = previousw[0] + ogcp2[1]; mpi = 0;
-
+#if 0
+ fprintf( stderr, "%c ", seq1[0][i] );
+ for( j=0; j<lgth2+1; j++ )
+ {
+ fprintf( stderr, "%5.0f ", currentw[j] );
+ }
+ fprintf( stderr, "\n" );
+#endif
+
+// mi = previousw[0] + ogcp2[1]; mpi = 0;
+ mi = previousw[0] + ogcp2[1] * gapfreq1[i-1]; mpi=0;
ijppt = ijp[i] + 1;
mjpt = m + 1;
prept = previousw;
ogcp2pt = ogcp2 + 1;
fgcp1va = fgcp1[i-1];
ogcp1va = ogcp1[i];
- lastj = lgth2+1;
+ gf1va = gapfreq1[i];
+ gf1vapre = gapfreq1[i-1];
+ gf2pt = gapfreq2+1;
+ gf2ptpre = gapfreq2;
+
+ if( trywarp )
+ {
+ prevwmrecordspt = prevwmrecords;
+ wmrecordspt = wmrecords+1;
+ wmrecords1pt = wmrecords;
+ warpipt = warpi + 1;
+ warpjpt = warpj + 1;
+ }
+
for( j=1; j<lastj; j++ )
{
+#ifdef xxxenablemultithread
+// fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref );
+ if( chudanpt && *chudanpt != chudanref )
+ {
+// fprintf( stderr, "\n\n## CHUUDAN!!! S\n" );
+ if( masklist1 ) freeintmtx( masklist1, maxdistclass ); masklist1 = NULL;
+ if( masklist2 ) freeintmtx( masklist2, maxdistclass ); masklist2 = NULL;
+ if( nmask ) free( nmask ); nmask = NULL;
+ *chudanres = 1;
+ return( -1.0 );
+ }
+#endif
wm = *prept;
*ijppt = 0;
#if 0
+ fprintf( stderr, "\n i=%d, j=%d %c, %c", i, j, seq1[0][i], seq2[0][j] );
fprintf( stderr, "%5.0f->", wm );
+ fprintf( stderr, "%5.0f? (penal=%5.2f)", g=mi+*fgcp2pt*(1.0-gapfreq1[i]), *fgcp2pt*(1.0-gapfreq1[i]) );
#endif
- g = mi + *fgcp2pt;
-#if 0
- fprintf( stderr, "%5.0f?", g );
-#endif
- if( g > wm )
+ if( (g=mi+*fgcp2pt*gf1va) > wm )
{
wm = g;
*ijppt = -( j - mpi );
+// fprintf( stderr, "Jump to %d (%c)!", mpi, seq2[0][mpi] );
}
- g = *prept + *ogcp2pt;
- if( g >= mi )
+ if( (g=*prept+*ogcp2pt*gf1vapre) >= mi )
{
mi = g;
mpi = j-1;
mi += fpenalty_ex;
#endif
- g = *mjpt + fgcp1va;
#if 0
- fprintf( stderr, "%5.0f?", g );
+ fprintf( stderr, "%5.0f->", wm );
+ fprintf( stderr, "%5.0f? (penal=%5.2f)", g=*mjpt+fgcp1va*(1.0-gapfreq2[j]), fgcp1va*(1.0-gapfreq2[j]) );
#endif
- if( g > wm )
+ if( (g=*mjpt+ fgcp1va* *gf2pt) > wm )
{
wm = g;
*ijppt = +( i - *mpjpt );
+// fprintf( stderr, "Jump to %d (%c)!", *mpjpt, seq1[0][*mpjpt] );
}
- g = *prept + ogcp1va;
- if( g >= *mjpt )
+ if( (g=*prept+ ogcp1va* *gf2ptpre) >= *mjpt )
{
*mjpt = g;
*mpjpt = i-1;
#if USE_PENALTY_EX
m[j] += fpenalty_ex;
#endif
+ if( trywarp )
+ {
+#if USE_PENALTY_EX
+ if( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai
+#else
+ if( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai
+#endif
+ {
+ if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] )
+ {
+ *ijppt = warpbase + warpn - 1;
+ }
+ else
+ {
+ *ijppt = warpbase + warpn;
+ warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) );
+ warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) );
+ warpis[warpn] = prevwarpi[j-1];
+ warpjs[warpn] = prevwarpj[j-1];
+ warpn++;
+ }
+ wm = g;
+ }
+ curm = *curpt + wm;
+
+ if( *wmrecords1pt > *wmrecordspt )
+ {
+ *wmrecordspt = *wmrecords1pt;
+ *warpipt = *(warpipt-1);
+ *warpjpt = *(warpjpt-1);
+ }
+ if( curm > *wmrecordspt )
+ {
+ *wmrecordspt = curm;
+ *warpipt = i;
+ *warpjpt = j;
+ }
+ wmrecordspt++;
+ wmrecords1pt++;
+ warpipt++;
+ warpjpt++;
+ }
#if 0
fprintf( stderr, "%5.0f ", wm );
mpjpt++;
fgcp2pt++;
ogcp2pt++;
+ gf2ptpre++;
+ gf2pt++;
+
}
lastverticalw[i] = currentw[lgth2-1];
+
+ if( trywarp )
+ {
+ fltncpy( prevwmrecords, wmrecords, lastj );
+ intncpy( prevwarpi, warpi, lastj );
+ intncpy( prevwarpj, warpj, lastj );
+ }
}
+ if( trywarp )
+ {
+// fprintf( stderr, "wm = %f\n", wm );
+// fprintf( stderr, "warpn = %d\n", warpn );
+ free( wmrecords );
+ free( prevwmrecords );
+ free( warpi );
+ free( warpj );
+ free( prevwarpi );
+ free( prevwarpj );
+ }
+
#if OUTGAP0TRY
if( !outgap )
*/
if( localhom )
{
- Atracking_localhom_gapmap( impmatch, currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp, icyc, jcyc, gapmap1, gapmap2 );
+ Atracking_localhom( impmatch, currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, warpis, warpjs, warpbase, &ngap1, &ngap2, 0 );
}
else
- Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp, icyc, jcyc, 1 );
+ Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, tailgp, warpis, warpjs, warpbase, &ngap1, &ngap2, 0 );
+
+ if( warpis ) free( warpis );
+ if( warpjs ) free( warpjs );
// fprintf( stderr, "### impmatch = %f\n", *impmatch );
for( i=0; i<icyc; i++ ) strcpy( seq1[i], mseq1[i] );
for( j=0; j<jcyc; j++ ) strcpy( seq2[j], mseq2[j] );
- /*
+#if 0
fprintf( stderr, "\n" );
for( i=0; i<icyc; i++ ) fprintf( stderr, "%s\n", mseq1[i] );
fprintf( stderr, "#####\n" );
for( j=0; j<jcyc; j++ ) fprintf( stderr, "%s\n", mseq2[j] );
- */
+#endif
// fprintf( stderr, "wm = %f\n", wm );
+
+ if( masklist1 ) freeintmtx( masklist1, maxdistclass ); masklist1 = NULL;
+ if( masklist2 ) freeintmtx( masklist2, maxdistclass ); masklist2 = NULL;
+ if( nmask ) free( nmask ); nmask = NULL;
+
return( wm );
}
-
#include "mltaln.h"
-void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg ) // n1 ha allgap
+static void strncpy0( char *s1, char *s2, int n )
+{
+ while( n-- ) *s1++ = *s2++;
+ *s1 = 0;
+}
+
+#if 0
+static void strncpy0x( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+static void strncpy0b0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+static void strncpy0b1( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+static void strncpy0b2( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+static void strncpy0n0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+static void strncpy0n1( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+static void strncpy0n2( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+static void strncpy0a0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+static void strncpy0a1( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+static void strncpy0a2( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+static void strncpy0o0( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+static void strncpy0o1( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+static void strncpy0o2( char *s1, char *s2, int n ) { while( n-- ) *s1++ = *s2++; *s1 = 0; }
+#endif
+
+static void eqpick( char *aseq, char *seq )
+{
+ for( ; *seq != 0; seq++ )
+ {
+ if( *seq != '=' )
+ *aseq++ = *seq;
+ }
+ *aseq = 0;
+
+}
+
+void profilealignment2( int n0, int n2, char **aln0, char **aln2, int alloclen, char alg ) // n1 ha allgap
{
int i, newlen;
double *effarr0, *effarr2;
- float dumfl;
- double eff;
+ int *allgap0, *allgap2;
+ double dumdb;
+ int alcount0, alcount2;
+
+ if( aln0[0][1] == 0 && aln2[0][1] == 0 ) return; // --allowshift no tokiha...
+// reporterr( "profilealignment!\n" );
+
+ commongappick( n0, aln0 );
+ commongappick( n2, aln2 );
+
effarr0 = AllocateDoubleVec( n0 );
effarr2 = AllocateDoubleVec( n2 );
+ allgap0 = AllocateIntVec( n0 );
+ allgap2 = AllocateIntVec( n2 );
+
+#if 1 // new weight 2015/Jun
+ alcount0 = 0;
+ for( i=0; i<n0; i++ )
+ {
+ if( isallgap( aln0[i] ) ) allgap0[i] = 1;
+ else
+ {
+ alcount0++;
+ allgap0[i] = 0;
+ }
+ }
+
+ alcount2 = 0;
+ for( i=0; i<n2; i++ )
+ {
+ if( isallgap( aln2[i] ) ) allgap2[i] = 1;
+ else
+ {
+ alcount2++;
+ allgap2[i] = 0;
+ }
+ }
+
+ for( i=0; i<n0; i++ ) if( !allgap0[i] ) effarr0[i] = 1.0 / (double)( alcount0 );
+ for( i=0; i<n2; i++ ) if( !allgap2[i] ) effarr2[i] = 1.0 / (double)( alcount2 );
+#else
+ eff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff;
+ eff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff;
+#endif
+
+ newgapstr = "-";
+ if( alg == 'M' )
+ MSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1
+ else
+ A__align( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1, -1, -1 ); //outgap=1, 2014/Dec/1
+
+ newlen = strlen( aln0[0] );
+
+#if 0 // tabun hitsuyou
+ for( j=0; j<newlen; j++ )
+ {
+// fprintf( stderr, "j=%d\n", j );
+ for( i=0; i<n0; i++ )
+ {
+ if( aln0[i][j] != '-' ) break;
+ }
+ if( i == n0 )
+ {
+ for( i=0; i<n1; i++ )
+ {
+ if( aln1[i][j] != '-' ) break;
+ }
+ }
+ else i = -1;
+
+ if( i == n1 )
+ {
+ for( i=0; i<n1; i++ ) aln1[i][j] = '=';
+ }
+ }
+ fprintf( stderr, "in profilealignment,\n" );
+ for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = \n%s\n", i, aln0[i] );
+ for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = \n%s\n", i, aln1[i] );
+ for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = \n%s\n", i, aln2[i] );
+#endif
+
+ free( effarr0 );
+ free( effarr2 );
+ free( allgap0 );
+ free( allgap2 );
+}
+
+static void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg ) // n1 ha allgap
+{
+ int i, j, newlen;
+ double *effarr0 = NULL, *effarr2 = NULL;
+ int *allgap0 = NULL, *allgap2 = NULL;
+ double dumdb;
+ int alcount0, alcount2;
+ char *cptr;
+
+// effarr0 = AllocateDoubleVec( n0 );
+// effarr2 = AllocateDoubleVec( n2 );
+// allgap0 = AllocateIntVec( n0 );
+// allgap2 = AllocateIntVec( n2 );
+//
+ if( aln0[0][1] == 0 && aln2[0][1] == 0 ) return; // --allowshift no tokiha...
+
+// reporterr( "In profilealignment(), strlen( aln0[0] ) %d\n", strlen( aln0[0] ) );
+// reporterr( "In profilealignment(), strlen( aln2[0] ) %d\n", strlen( aln2[0] ) );
commongappick( n0, aln0 );
commongappick( n2, aln2 );
+// reporterr( "after commongappick, strlen( aln0[0] ) %d\n", strlen( aln0[0] ) );
+// reporterr( "after commongappick, strlen( aln2[0] ) %d\n", strlen( aln2[0] ) );
+
+// reporterr( "\n\n\n" );
+
+ if( aln2[0][0] == 0 )
+ {
+ newlen = j = strlen( aln0[0] );
+ cptr = aln2[0];
+ while( j-- ) *cptr++ = '-';
+ *cptr = 0;
+
+ cptr = aln2[0];
+ for( i=1; i<n2; i++ ) strcpy( aln2[i], cptr );
+ return;
+ }
+
+#if 1
+ effarr0 = (double *)malloc( n0 * sizeof( double ) );
+ effarr2 = (double *)malloc( n2 * sizeof( double ) );
+ allgap0 = (int *)malloc( n0 * sizeof( int ) );
+ allgap2 = (int *)malloc( n2 * sizeof( int ) );
+#else
+ effarr0 = (double *)calloc( n0, sizeof( double ) );
+ effarr2 = (double *)calloc( n2, sizeof( double ) );
+ allgap0 = (int *)calloc( n0, sizeof( int ) );
+ allgap2 = (int *)calloc( n2, sizeof( int ) );
+#endif
+
+#if 1 // new weight 2015/Jun
+ alcount0 = 0;
+ for( i=0; i<n0; i++ )
+ {
+ if( isallgap( aln0[i] ) ) allgap0[i] = 1;
+ else
+ {
+ alcount0++;
+ allgap0[i] = 0;
+ }
+ }
+
+ alcount2 = 0;
+ for( i=0; i<n2; i++ )
+ {
+ if( isallgap( aln2[i] ) ) allgap2[i] = 1;
+ else
+ {
+ alcount2++;
+ allgap2[i] = 0;
+ }
+ }
+ for( i=0; i<n0; i++ ) if( !allgap0[i] ) effarr0[i] = 1.0 / (double)( alcount0 ); else effarr0[i] = 0.0; // malloc / alloca no baai
+ for( i=0; i<n2; i++ ) if( !allgap2[i] ) effarr2[i] = 1.0 / (double)( alcount2 ); else effarr2[i] = 0.0; // malloc / alloca no baai
+#else
+ for( i=0; i<n0; i++ ) if( !allgap0[i] ) effarr0[i] = 1.0 / (double)( alcount0 ); // calloc no baai
+ for( i=0; i<n2; i++ ) if( !allgap2[i] ) effarr2[i] = 1.0 / (double)( alcount2 ); // calloc no baai
eff = 1.0 / (double)n0; for( i=0; i<n0; i++ ) effarr0[i] = eff;
eff = 1.0 / (double)n2; for( i=0; i<n2; i++ ) effarr2[i] = eff;
+#endif
+ newgapstr = "-";
if( alg == 'M' )
- MSalignmm( aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); //outgap=1??
+ MSalignmm( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap=1, 2014/Dec/1
else
- A__align( aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 ); //outgap=1??
+ A__align( n_dis_consweight_multi, aln0, aln2, effarr0, effarr2, n0, n2, alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1, -1, -1 ); //outgap=1, 2014/Dec/1
newlen = strlen( aln0[0] );
aln1[0][i] = 0;
for( i=1; i<n1; i++ ) strcpy( aln1[i], aln1[0] );
+ for( j=0; j<newlen; j++ )
+ {
+// fprintf( stderr, "j=%d\n", j );
+ for( i=0; i<n0; i++ )
+ {
+ if( aln0[i][j] != '-' ) break;
+ }
+ if( i == n0 )
+ {
+ for( i=0; i<n1; i++ )
+ {
+ if( aln1[i][j] != '-' ) break;
+ }
+ }
+ else i = -1;
+
+ if( i == n1 )
+ {
+ for( i=0; i<n1; i++ ) aln1[i][j] = '=';
+ }
+ }
+#if 0
+ fprintf( stderr, "in profilealignment, before commongappick\n" );
+ for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = %s\n", i, aln0[i] );
+ for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = %s\n", i, aln1[i] );
+ for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = %s\n", i, aln2[i] );
+#endif
+
+#if 0
+ fprintf( stderr, "in profilealignment, after commongappick\n" );
+ for( i=0; i<n0; i++ ) fprintf( stderr, "\n>aln0[%d] = %s\n", i, aln0[i] );
+ for( i=0; i<n1; i++ ) fprintf( stderr, "\n>aln1[%d] = %s\n", i, aln1[i] );
+ for( i=0; i<n2; i++ ) fprintf( stderr, "\n>aln2[%d] = %s\n", i, aln2[i] );
+#endif
+
free( effarr0 );
free( effarr2 );
+ free( allgap0 );
+ free( allgap2 );
+}
+
+void eq2dashmatomete( char **s, int n )
+{
+ int i, j;
+ char sj;
+
+ for( j=0; (sj=s[0][j]); j++ )
+ {
+ if( sj == '=' )
+ {
+ for( i=0; i<n; i++ )
+ {
+ s[i][j] = '-';
+ }
+ }
+ }
+}
+
+void eq2dashmatometehayaku( char **s, int n )
+{
+ int i, j, c;
+ int *tobechanged;
+ int len = strlen( s[0] );
+
+ tobechanged = calloc( len, sizeof( int ) );
+ c = 0;
+ for( j=0; j<len; j++ )
+ {
+ if( s[0][j] == '=' ) tobechanged[c++] = j;
+ }
+ tobechanged[c] = -1;
+
+ for( i=0; i<n; i++ )
+ {
+ for( c=0; (j=tobechanged[c])!=-1; c++ )
+ s[i][j] = '-';
+ }
+ free( tobechanged );
}
void eq2dash( char *s )
{
while( *s )
{
- if( *s == '=' ) *s = '-';
+ if( *s == '=' )
+ {
+ *s = '-';
+ }
s++;
}
}
-void findnewgaps( int n, char **seq, int *gaplen )
+static void plus2gapchar( char *s, char gapchar )
{
- int i, pos, len;
- len = strlen( seq[0] );
+ while( *s )
+ {
+ if( *s == '+' )
+ {
+ *s = gapchar;
+ }
+ s++;
+ }
+}
-// fprintf( stderr, "seq[0] = %s\n", seq[0] );
- for( i=0; i<len; i++ ) gaplen[i] = 0;
+void findnewgaps( int n, int rep, char **seq, int *gaplen )
+{
+ int i, pos, len, len1;
+
+ len = strlen( seq[0] );
+// for( i=0; i<len; i++ ) gaplen[i] = 0; // calloc de shokika sareteirukara hontou ha iranai
+ len1 = len + 1;
+ for( i=0; i<len1; i++ ) gaplen[i] = 0; // realloc de shokika sareteirukara iru!
pos = 0;
for( i=0; i<len; i++ )
{
- if( seq[0][i] == '=' )
+ if( seq[rep][i] == '=' )
{
-// fprintf( stderr, "Newgap! pos = %d\n", pos );
+ if( disp ) fprintf( stderr, "Newgap! pos = %d\n", pos );
gaplen[pos]++;
}
else
pos++;
}
+
+#if 0
+ if( disp )
+ {
+ fprintf( stderr, "\ngaplen[] in findnewgaps() = \n" );
+ for(i=0; i<pos; i++ ) fprintf( stderr, "%d ", gaplen[i] );
+ fprintf( stderr, "\n" );
+ reporterr( "pos=%d\n", pos );
+ }
+#endif
}
-void findcommongaps( int n, char **seq, int *gaplen )
+void findcommongaps( int n, char **seq, int *gapmap )
{
int i, j, pos, len, len1;
len = strlen( seq[0] );
len1 = len+1;
// fprintf( stderr, "seq[0] = %s\n", seq[0] );
- for( i=0; i<len1; i++ ) gaplen[i] = 0;
+ for( i=0; i<len1; i++ ) gapmap[i] = 0;
pos = 0;
for( i=0; i<len; i++ )
for( j=0; j<n; j++ )
if( seq[j][i] != '-' ) break;
- if( j == n ) gaplen[pos]++;
+ if( j == n ) gapmap[pos]++;
else
pos++;
}
#if 0
for( i=0; i<pos; i++ )
{
- fprintf( stderr, "vec[%d] = %d\n", i, gaplen[i] );
+ fprintf( stderr, "vec[%d] = %d\n", i, gapmap[i] );
}
#endif
}
int newlen1 = newlen+1;
int *tmpmap;
+
tmpmap = AllocateIntVec( newlen+2 );
j = 0;
pos = 0;
gapmap[j] = tmpmap[j];
free( tmpmap );
+
+#if 0
+ reporterr( "gapmap in adjustgapmap() =\n" );
+ for(j=0; j<newlen1; j++) reporterr( "%d ", gapmap[j] );
+ reporterr( "length = %d\n", newlen );
+#endif
+}
+
+
+static int countnogaplen( int *gaplen, int *term )
+{
+ int v = 0;
+ while( gaplen < term )
+ {
+ if( *gaplen++ == 0 ) v++;
+ else break;
+ }
+ return( v );
+}
+
+static int countgapmap( int *gapmap, int *term )
+{
+ int v = 0;
+ while( gapmap < term )
+ {
+// reporterr( "*gapmap = %d\n", *gapmap );
+ if( *gapmap++ == 0 ) v++;
+ else break;
+ }
+ return( v );
}
-void insertnewgaps( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int alloclen, char alg )
+void insertnewgaps( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int alloclen, char alg, char gapchar )
{
int *mar;
char *gaps;
char *cptr;
- int i, j, k, len, rep, len0;
+ int i, j, k, len, rep, len0, lp, blocklen;
char **mseq2, **mseq0, **mseq1;
- char **aseq;
+ char **aseq, *newchar;
int ngroup2, ngroup0, ngroup1;
int *list0, *list1, *list2;
int posin12, gapshift, newpos;
+ int mlen1, mlen0, mlen2;
+
mar = calloc( njob, sizeof( int ) );
list0 = calloc( njob, sizeof( int ) );
if( mar[i] == 0 )
{
list0[ngroup0] = i;
+// fprintf( stderr, "inserting new gaps to %d\n", i );
ngroup0++;
}
}
list0[ngroup0] = list1[ngroup1] = list2[ngroup2] = -1;
if( ngroup0 == 0 )
{
- fprintf( stderr, "Nothing to do\n" );
+// fprintf( stderr, "Nothing to do\n" );
free( mar );
free( list0 );
free( list1 );
gaps = calloc( alloclen, sizeof( char ) );
for( i=0; i<njob; i++ ) aseq[i][0] = 0;
+ newpos = 0;
posin12 = 0;
+#if 0
+ fprintf( stderr, "\ngaplen[] = \n" );
+ for(i=0; i<len0; i++ ) fprintf( stderr, "%d", gaplen[i] );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "\ngapmap[] = \n" );
+ for(i=0; i<len0; i++ ) fprintf( stderr, "%d", gapmap[i] );
+ fprintf( stderr, "\n" );
+#endif
+
for( j=0; j<len0; j++ )
{
+// fprintf( stderr, "\nj=%d, gaplen[%d]=%d\n", j, j, gaplen[j] );
if( gaplen[j] )
{
+// fprintf( stderr, "j=%d GAP!\n", j );
for( i=0; i<ngroup0; i++ ) mseq0[i][0] = 0;
for( i=0; i<ngroup1; i++ ) mseq1[i][0] = 0;
for( i=0; i<ngroup2; i++ ) mseq2[i][0] = 0;
+ mlen0 = mlen1 = mlen2 = 0;
gapshift = gaplen[j];
cptr = gaps;
- while( gapshift-- ) *cptr++ = '-';
+ while( gapshift-- ) *cptr++ = gapchar;
*cptr = 0;
gapshift = gaplen[j];
- for( i=0; i<ngroup0; i++ ) strncat( mseq0[i], gaps, gapshift );
- for( i=0; i<ngroup1; i++ ) strncat( mseq1[i], seq[list1[i]]+posin12, gapshift );
- for( i=0; i<ngroup2; i++ ) strncat( mseq2[i], seq[list2[i]]+posin12, gapshift );
+ for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, gaps, gapshift );
+ for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshift );
+ for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshift );
posin12 += gapshift;
+ mlen0 += gapshift;
+ mlen1 += gapshift;
+ mlen2 += gapshift;
gapshift = gapmap[posin12];
// fprintf( stderr, "gapmap[%d] kouho = %d\n", posin12, gapmap[posin12] );
- for( i=0; i<ngroup0; i++ ) strncat( mseq0[i], seq[list0[i]]+j, gapshift );
- for( i=0; i<ngroup1; i++ ) strncat( mseq1[i], seq[list1[i]]+posin12, gapshift );
- for( i=0; i<ngroup2; i++ ) strncat( mseq2[i], seq[list2[i]]+posin12, gapshift );
+ for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, seq[list0[i]]+j, gapshift );
+ for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshift );
+ for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshift );
+ mlen0 += gapshift;
+ mlen1 += gapshift;
+ mlen2 += gapshift;
#if 0
for( i=0; i<ngroup0; i++ ) fprintf( stderr, "### mseq0[%d] = %s\n", i, mseq0[i] );
for( i=0; i<ngroup1; i++ ) fprintf( stderr, "### mseq1[%d] = %s\n", i, mseq1[i] );
for( i=0; i<ngroup2; i++ ) fprintf( stderr, "### mseq2[%d] = %s\n", i, mseq2[i] );
#endif
- if( gapshift ) profilealignment( ngroup0, ngroup1, ngroup2, mseq0, mseq1, mseq2, alloclen, alg );
+ if( gapshift )
+ {
+// reporterr( "profilealignment (j=%d)!!!\n", j );
+
+ profilealignment( ngroup0, ngroup1, ngroup2, mseq0, mseq1, mseq2, alloclen, alg );
+ }
j += gapshift;
posin12 += gapshift;
- for( i=0; i<ngroup0; i++ ) strcat( aseq[list0[i]], mseq0[i] );
- for( i=0; i<ngroup1; i++ ) strcat( aseq[list1[i]], mseq1[i] );
- for( i=0; i<ngroup2; i++ ) strcat( aseq[list2[i]], mseq2[i] );
+ newpos = strlen( aseq[rep] ); // kufuu?
+ for( i=0; i<ngroup0; i++ ) strcpy( aseq[list0[i]]+newpos, mseq0[i] );
+ for( i=0; i<ngroup1; i++ ) strcpy( aseq[list1[i]]+newpos, mseq1[i] );
+ for( i=0; i<ngroup2; i++ ) strcpy( aseq[list2[i]]+newpos, mseq2[i] );
+
+// fprintf( stderr, "gapshift = %d\n", gapshift );
}
+ blocklen = 1 + countnogaplen( gaplen+j+1, gaplen+len0 );
+// fprintf( stderr, "\nj=%d, blocklen=%d, len0=%d\n", j, blocklen, len0 );
+// blocklen = 1;
newpos = strlen( aseq[rep] );
+
+#if 0
for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos] = seq[list0[i]][j];
for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos] = seq[list1[i]][posin12];
for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos] = seq[list2[i]][posin12];
for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos+1] = 0;
for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos+1] = 0;
for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos+1] = 0;
+#else
+
+ for( i=0; i<ngroup0; i++ )
+ {
+ lp = list0[i];
+ newchar = aseq[lp] + newpos;
+ strncpy0( newchar, seq[lp]+j, blocklen );
+ }
+ for( i=0; i<ngroup1; i++ )
+ {
+ lp = list1[i];
+ newchar = aseq[lp] + newpos;
+ strncpy0( newchar, seq[lp]+posin12, blocklen );
+ }
+ for( i=0; i<ngroup2; i++ )
+ {
+ lp = list2[i];
+ newchar = aseq[lp] + newpos;
+ strncpy0( newchar, seq[lp]+posin12, blocklen );
+ }
+// fprintf( stderr, "### aseq[l0] = %s\n", aseq[list0[0]] );
+// fprintf( stderr, "### aseq[l1] = %s\n", aseq[list1[0]] );
+// fprintf( stderr, "### aseq[l2] = %s\n", aseq[list2[0]] );
+#endif
+
+// fprintf( stderr, "j=%d -> %d\n", j, j+blocklen-1 );
+ j += (blocklen-1);
+
+
+ posin12 += (blocklen-1);
+
posin12++;
}
+#if 0
+ fprintf( stderr, "\n" );
+ for( i=0; i<ngroup0; i++ ) fprintf( stderr, " seq[l0i] = %s\n", seq[list0[i]] );
+ for( i=0; i<ngroup1; i++ ) fprintf( stderr, " seq[l1i] = %s\n", seq[list1[i]] );
+ for( i=0; i<ngroup2; i++ ) fprintf( stderr, " seq[l2i] = %s\n", seq[list2[i]] );
+ fprintf( stderr, "=====>\n" );
+ for( i=0; i<ngroup0; i++ ) fprintf( stderr, "aseq[l0i] = %s\n", aseq[list0[i]] );
+ for( i=0; i<ngroup1; i++ ) fprintf( stderr, "aseq[l1i] = %s\n", aseq[list1[i]] );
+ for( i=0; i<ngroup2; i++ ) fprintf( stderr, "aseq[l2i] = %s\n", aseq[list2[i]] );
+#endif
// for( i=0; i<njob; i++ ) if( mar[i] != 3 ) strcpy( seq[i], aseq[i] );
for( i=0; i<ngroup0; i++ ) strcpy( seq[list0[i]], aseq[list0[i]] );
for( i=0; i<ngroup1; i++ ) strcpy( seq[list1[i]], aseq[list1[i]] );
for( i=0; i<ngroup2; i++ ) strcpy( seq[list2[i]], aseq[list2[i]] );
+
free( mar );
free( gaps );
free( list0 );
free( list1 );
free( list2 );
FreeCharMtx( mseq2 );
+ FreeCharMtx( mseq1 ); // ? added 2012/02/12
FreeCharMtx( mseq0 );
+ FreeCharMtx( aseq ); // ? added 2012/02/12
}
-
-void restorecommongaps( int njob, char **seq, int *ex1, int *ex2, int *gaplen, int alloclen )
+void insertnewgaps_bothorders( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int gapmaplen, int alloclen, char alg, char gapchar )
{
+// int disp = 0;
int *mar;
- char *tmpseq;
+ char *gaps;
char *cptr;
- int *iptr;
- int *tmpgaplen;
- int i, j, k, len, rep, len1;
+ int i, j, k, len, rep, len0, lp, blocklen, blockmap;
+ char **mseq2, **mseq0, **mseq1;
+ char **aseq, *newchar;
+ int ngroup2, ngroup0, ngroup1;
+ int *list0, *list1, *list2;
+ int posin12, gapshifta, gapshiftn, gapshiftb, newpos;
+ int mlen1, mlen0, mlen2;
+ int jinc;
- mar = calloc( njob, sizeof( int ) );
- tmpseq = calloc( alloclen, sizeof( char ) );
- tmpgaplen = calloc( alloclen, sizeof( int ) );
-// tmpseq = calloc( alloclen+2, sizeof( char ) );
-// tmpgaplen = calloc( alloclen+2, sizeof( int ) );
+#if 0 // for debug
+ int bug = 0;
+ char *nogapseq1, *nogapseq2;
+ nogapseq1 = calloc( alloclen, sizeof( char ) );
+ nogapseq2 = calloc( alloclen, sizeof( char ) );
+#endif // for debug
+ mar = calloc( njob, sizeof( int ) );
+ list0 = calloc( njob, sizeof( int ) );
+ list1 = calloc( njob, sizeof( int ) );
+ list2 = calloc( njob, sizeof( int ) );
for( i=0; i<njob; i++ ) mar[i] = 0;
+ for( i=0; i<njob; i++ )
+ {
+ if( alreadyaligned[i]==0 ) mar[i] = 3;
+ }
for( i=0; (k=ex1[i])>-1; i++ )
{
mar[k] = 1;
}
for( i=0; (k=ex2[i])>-1; i++ )
{
- mar[k] = 1;
+ mar[k] = 2;
// fprintf( stderr, "excluding %d\n", ex2[i] );
}
+ ngroup2 = ngroup1 = ngroup0 = 0;
for( i=0; i<njob; i++ )
- if( mar[i] ) break;
-
- if( i == njob )
+ {
+ if( mar[i] == 2 )
+ {
+ list2[ngroup2] = i;
+ ngroup2++;
+ }
+ if( mar[i] == 1 )
+ {
+ list1[ngroup1] = i;
+ ngroup1++;
+ }
+ if( mar[i] == 0 )
+ {
+ list0[ngroup0] = i;
+// fprintf( stderr, "inserting new gaps to %d\n", i );
+ ngroup0++;
+ }
+ }
+ list0[ngroup0] = list1[ngroup1] = list2[ngroup2] = -1;
+ if( ngroup0 == 0 )
{
// fprintf( stderr, "Nothing to do\n" );
free( mar );
- free( tmpseq );
- free( tmpgaplen );
+ free( list0 );
+ free( list1 );
+ free( list2 );
return;
}
+
+ for( i=0; i<njob; i++ ) if( mar[i] == 0 ) break;
rep = i;
len = strlen( seq[rep] );
- len1 = len+1;
+ len0 = len+1;
+// reporterr( "alloclen = %d\n", alloclen );
+// reporterr( "len0 = %d\n", strlen( seq[list0[0]] ) );
+// reporterr( "len1 = %d\n", strlen( seq[list1[0]] ) );
+// reporterr( "gapmaplen = %d\n", gapmaplen );
+// reporterr( "ng0, ng1, ng2 = %d, %d, %d\n", ngroup0, ngroup1, ngroup2 );
- for( i=0; i<njob; i++ )
+//
+// if( i == njob )
+// {
+//// fprintf( stderr, "Nothing to do\n" );
+// free( mar );
+// return;
+// }
+
+ mseq2 = AllocateCharMtx( ngroup2, alloclen );
+ mseq1 = AllocateCharMtx( ngroup1, alloclen );
+ mseq0 = AllocateCharMtx( ngroup0, alloclen );
+ aseq = AllocateCharMtx( njob, alloclen );
+ gaps = calloc( alloclen, sizeof( char ) );
+
+ for( i=0; i<njob; i++ ) aseq[i][0] = 0;
+ newpos = 0;
+ posin12 = 0;
+
+#if 0
+ if( disp )
{
- if( mar[i] == 0 ) continue;
- cptr = tmpseq;
- for( j=0; j<len1; j++ )
+ int p;
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "len0 = %d\n", len0 );
+ fprintf( stderr, "\ngaplen[] = \n" );
+ reporterr( "seq0[0] = %s\n", seq[list0[0]] );
+ reporterr( "seq1[1] = %s\n", seq[list1[0]] );
+ reporterr( "seq2[2] = %s\n", seq[list2[0]] );
+ reporterr( "seq[rep] = %s\n", seq[rep] );
+ for(i=0,p=0; i<gapmaplen; i++,p++ )
{
- for( k=0; k<gaplen[j]; k++ )
- *(cptr++) = '-';
- *(cptr++) = seq[i][j];
+ fprintf( stderr, "gaplen %d:%d %-*.*s\n", p, gaplen[p], gaplen[p]+1, gaplen[p]+1, seq[list1[0]]+i );
+ i += gaplen[p];
}
- *cptr = 0;
- strcpy( seq[i], tmpseq );
}
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "\ngapmap[] = \n" );
+ for(i=0; i<gapmaplen; i++ ) fprintf( stderr, "gapmap %d:%d %-*.*s\n", i, gapmap[i], gapmap[i]+1, gapmap[i]+1, seq[list1[0]]+i );
- iptr = tmpgaplen;
- for( j=0; j<len1; j++ )
- {
- *(iptr++) = gaplen[j];
- for( k=0; k<gaplen[j]; k++ )
- *(iptr++) = 0;
- }
- *iptr = -1;
+ reporterr( "seq1 = \n" );
+ reporterr( "%s\n", seq[list1[0]] );
+#endif
+
+
+
+ for( j=0; j<len0; j++ )
+ {
+// fprintf( stderr, "\nj=%d, gaplen[%d]=%d\n", j, j, gaplen[j] );
+// if( gaplen[j] || gapmap[posin12] )
+ if( gaplen[j] || gapmap[posin12] )
+ {
+// fprintf( stderr, "j=%d GAP!\n", j );
+ for( i=0; i<ngroup0; i++ ) mseq0[i][0] = 0;
+ for( i=0; i<ngroup1; i++ ) mseq1[i][0] = 0;
+ for( i=0; i<ngroup2; i++ ) mseq2[i][0] = 0;
+ mlen0 = mlen1 = mlen2 = 0;
+
+
+ gapshiftb = gapmap[posin12];
+// fprintf( stderr, "\ngapmap[%d] kouho = %d, posint12 = %d\n", posin12, gapmap[posin12], posin12 );
+
+ if( gapshiftb ) // koko ga osoi!
+ {
+ for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, seq[list0[i]]+j, gapshiftb ); // tokuni koko!
+ for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshiftb );
+ for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshiftb );
+ mlen0 += gapshiftb;
+ mlen1 += gapshiftb;
+ mlen2 += gapshiftb;
+#if 0
+ reporterr( "\n\n" );
+ for( i=0; i<1; i++ ) fprintf( stderr, "##b mseq0[%d] = %s\n", i, mseq0[i] );
+ for( i=0; i<1; i++ ) fprintf( stderr, "##b mseq1[%d] = %s\n", i, mseq1[i] );
+ for( i=0; i<1; i++ ) fprintf( stderr, "##b mseq2[%d] = %s\n", i, mseq2[i] );
+#endif
+ posin12 += gapshiftb;
+ }
+#if 0 // nen no tame
+ for( i=0, jinc=0; i<gapshiftb; i++ ) jinc += 1+gaplen[j+i];
+ if( jinc != gapshiftb )
+ {
+ reporterr( "\n#################!\n" );
+ reporterr( "# Unexpected gap pattern!\n" );
+ reporterr( "# There are overlapped %d gaps in gaplen[] and gapmap[]. j=%d-%d, posin12=%d-%d\n", jinc, j-gapshiftb-jinc, j, posin12-gapshiftb, posin12 );
+ reporterr( "\n#################!\n" );
+ exit( 1 );
+ }
+ j += gapshiftb;
+#else
+ j += gapshiftb;
+#endif
+
+#if 0
+ if( disp && gapshiftb )
+ {
+ reporterr( "after gapshiftb, j=%d, posin12=%d\n", j, posin12 );
+ reporterr( "mseq0[0] = %s\n", mseq0[0] );
+ reporterr( "mseq1[0] = %s\n", mseq1[0] );
+ reporterr( "mseq2[0] = %s\n", mseq2[0] );
+ }
+#endif
+
+// fprintf( stderr, "gaplen[%d]=%d, posin12 = %d\n", j, gaplen[j], posin12 );
+
+ while( 1 )
+ {
+ gapshiftn = gaplen[j];
+ if( gapshiftn )
+ {
+ cptr = gaps;
+ while( gapshiftn-- ) *cptr++ = gapchar;
+ *cptr = 0;
+ gapshiftn = gaplen[j];
+
+ for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, gaps, gapshiftn );
+ for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshiftn );
+ for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshiftn );
+ posin12 += gapshiftn;
+ mlen0 += gapshiftn;
+ mlen1 += gapshiftn;
+ mlen2 += gapshiftn;
+#if 0
+ for( i=0; i<1; i++ ) fprintf( stderr, "##n mseq0[%d] = %s\n", i, mseq0[i] );
+ for( i=0; i<1; i++ ) fprintf( stderr, "##n mseq1[%d] = %s\n", i, mseq1[i] );
+ for( i=0; i<1; i++ ) fprintf( stderr, "##n mseq2[%d] = %s\n", i, mseq2[i] );
+#endif
+ }
+
+#if 0
+ if( disp && gapshiftn )
+ {
+ reporterr( "after gapshiftn (j=%d, gaplen[j]=%d, posin12=%d, gapshiftn=%d)\n", j, gaplen[j], posin12-gapshiftn, gapshiftn );
+ reporterr( "mseq0[0] = %s\n", mseq0[0] );
+ reporterr( "mseq1[0] = %s\n", mseq1[0] );
+ reporterr( "mseq2[0] = %s\n", mseq2[0] );
+ }
+#endif
+
+
+ gapshifta = gapmap[posin12];
+// fprintf( stderr, "gapmap[%d] kouho = %d, posin12 = %d\n", posin12, gapmap[posin12], posin12 );
+
+ if( gapshifta )
+ {
+ for( i=0; i<ngroup0; i++ ) strncpy0( mseq0[i]+mlen0, seq[list0[i]]+j, gapshifta );
+ for( i=0; i<ngroup1; i++ ) strncpy0( mseq1[i]+mlen1, seq[list1[i]]+posin12, gapshifta );
+ for( i=0; i<ngroup2; i++ ) strncpy0( mseq2[i]+mlen2, seq[list2[i]]+posin12, gapshifta );
+ mlen0 += gapshifta;
+ mlen1 += gapshifta;
+ mlen2 += gapshifta;
+ posin12 += gapshifta;
+
+#if 0
+ for( i=0; i<1; i++ ) fprintf( stderr, "##a mseq0[%d] = %s\n", i, mseq0[i] );
+ for( i=0; i<1; i++ ) fprintf( stderr, "##a mseq1[%d] = %s\n", i, mseq1[i] );
+ for( i=0; i<1; i++ ) fprintf( stderr, "##a mseq2[%d] = %s\n", i, mseq2[i] );
+#endif
+ }
+#if 0
+ j += gapshifta; // BUG!!
+#else
+// for( i=1, jinc=1; i<gapshifta; i++ ) jinc += 1+gaplen[j+i];
+ for( i=1, jinc=0; i<=gapshifta; i++ ) jinc += 1+gaplen[j+i];
+// j += jinc;
+ j += gapshifta;
+
+ if( jinc == gapshifta ) break;
+
+ reporterr( "(a) There are overlapped %d gaps in gaplist[] and gapmap[]. j=%d-%d, posin12=%d-%d, jinc=%d, gapshifta=%d\n", jinc, j-gapshifta-jinc, j, posin12-gapshifta, posin12, jinc, gapshifta );
+#endif
+ }
- iptr = tmpgaplen;
- while( *iptr != -1 ) *gaplen++ = *iptr++;
+#if 0
+ if( disp && gapshifta )
+ {
+ reporterr( "after gapshifta, j=%d, posin12=%d\n", j, posin12 );
+ reporterr( "mseq0[0] = %s\n", mseq0[0] );
+ reporterr( "mseq1[0] = %s\n", mseq1[0] );
+ reporterr( "mseq2[0] = %s\n", mseq2[0] );
+ }
+#endif
+
+
+ if( gapshiftb + gapshifta )
+ {
+#if 0
+ for( i=0; i<1; i++ ) fprintf( stderr, "### mseq0[%d] = %s\n", i, mseq0[i] );
+ for( i=0; i<1; i++ ) fprintf( stderr, "### mseq1[%d] = %s\n", i, mseq1[i] );
+ for( i=0; i<1; i++ ) fprintf( stderr, "### mseq2[%d] = %s\n", i, mseq2[i] );
+#endif
+// if( disp ) reporterr( "profilealignment (j=%d)!!!\n", j );
+
+ profilealignment( ngroup0, ngroup1, ngroup2, mseq0, mseq1, mseq2, alloclen, alg );
+ }
+
+
+ newpos = strlen( aseq[rep] ); // kufuu?
+ for( i=0; i<ngroup0; i++ ) strcpy( aseq[list0[i]]+newpos, mseq0[i] );
+ for( i=0; i<ngroup1; i++ ) strcpy( aseq[list1[i]]+newpos, mseq1[i] );
+ for( i=0; i<ngroup2; i++ ) strcpy( aseq[list2[i]]+newpos, mseq2[i] );
+
+#if 0
+ if( disp )
+ {
+ reporterr( "after profilealignment\n" );
+ reporterr( "mseq0[0] = %s\n", mseq0[0] );
+ reporterr( "mseq1[0] = %s\n", mseq1[0] );
+ reporterr( "mseq2[0] = %s\n", mseq2[0] );
+
+ gappick0equalminustmptmptmp( nogapseq1, aseq[list1[0]] );
+ gappick0equalminustmptmptmp( nogapseq2, seq[list1[0]] );
+
+ reporterr( "aseq[list1[0].nogap = %s\n", nogapseq1 );
+ reporterr( " seq[list1[0].nogap = %s\n", nogapseq2 );
+ }
+#endif
+
+// fprintf( stderr, "gapshift = %d\n", gapshift );
+ }
+ newpos = strlen( aseq[rep] );
+ blocklen = 1 + countnogaplen( gaplen+j+1, gaplen+len0 );
+// fprintf( stderr, "\nj=%d, blocklen=%d, len0=%d\n", j, blocklen, len0 );
+
+ blockmap = 1 + countgapmap( gapmap+posin12+1, gapmap+gapmaplen );
+// fprintf( stderr, "posin12=%d, blockmap=%d, len0=%d\n", posin12, blockmap, len0 );
+
+// if( disp ) reporterr( "newpos = %d, blocklen = %d, blockmap = %d, j=%d, posin12=%d\n", newpos, blocklen, blockmap, j, posin12 );
+
+ if( blockmap < blocklen ) blocklen = blockmap;
+
+
+
+
+#if 0
+ for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos] = seq[list0[i]][j];
+ for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos] = seq[list1[i]][posin12];
+ for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos] = seq[list2[i]][posin12];
+ for( i=0; i<ngroup0; i++ ) aseq[list0[i]][newpos+1] = 0;
+ for( i=0; i<ngroup1; i++ ) aseq[list1[i]][newpos+1] = 0;
+ for( i=0; i<ngroup2; i++ ) aseq[list2[i]][newpos+1] = 0;
+#else
+
+// if( j >= len0 ) break; // iru?
+
+
+ for( i=0; i<ngroup0; i++ )
+ {
+ lp = list0[i];
+ newchar = aseq[lp] + newpos;
+ strncpy0( newchar, seq[lp]+j, blocklen );
+// *(newchar+blocklen) = 0; iranai
+ }
+ for( i=0; i<ngroup1; i++ )
+ {
+ lp = list1[i];
+ newchar = aseq[lp] + newpos;
+ strncpy0( newchar, seq[lp]+posin12, blocklen );
+// *(newchar+blocklen) = 0; iranai
+ }
+ for( i=0; i<ngroup2; i++ )
+ {
+ lp = list2[i];
+ newchar = aseq[lp] + newpos;
+ strncpy0( newchar, seq[lp]+posin12, blocklen );
+// *(newchar+blocklen) = 0; iranai
+ }
+
+
+// reporterr( "adding %c to aseq[list1[0]]\n", seq[list1[0]][posin12] );
+
+
+// for( i=0; i<ngroup0; i++ ) fprintf( stderr, "### aseq0[%d] = %s\n", i, aseq[list0[i]] );
+// for( i=0; i<ngroup1; i++ ) fprintf( stderr, "### aseq1[%d] = %s\n", i, aseq[list1[i]] );
+// for( i=0; i<ngroup2; i++ ) fprintf( stderr, "### aseq2[%d] = %s\n", i, aseq[list2[i]] );
+#endif
+
+// fprintf( stderr, "j=%d -> %d\n", j, j+blocklen-1 );
+
+ j += (blocklen-1);
+// j += gaplen[j];
+
+
+ posin12 += (blocklen-1); // sono aida ni gapmap wo miotosu?
+
+
+ posin12++;
+
+#if 0
+ if( disp )
+ {
+ gappick0equalminustmptmptmp( nogapseq1, aseq[list1[0]] );
+ gappick0equalminustmptmptmp( nogapseq2, seq[list1[0]] );
+
+ reporterr( "aseq[list1[0].nogap = %s\n", nogapseq1 );
+ reporterr( " seq[list1[0].nogap = %s\n", nogapseq2 );
+ reporterr( "" );
+// reporterr( "seq[list1[0]] = %s\n", seq[list1[0]] );
+// reporterr( "seq[list2[0]] = %s\n", seq[list2[0]] );
+ }
+#endif
+ }
+#if 0
+ fprintf( stderr, "\n" );
+ for( i=0; i<ngroup0; i++ ) fprintf( stderr, " seq[l0i] = \n%s\n", seq[list0[i]] );
+ for( i=0; i<ngroup1; i++ ) fprintf( stderr, " seq[l1i] = \n%s\n", seq[list1[i]] );
+ for( i=0; i<ngroup2; i++ ) fprintf( stderr, " seq[l2i] = \n%s\n", seq[list2[i]] );
+ reporterr( "0 1 2 3 4 5 6 7 \n" );
+ reporterr( "012345678901234567890123456789012345678901234567890123456789012345678901234567\n" );
+ fprintf( stderr, "=====>\n" );
+ for( i=0; i<ngroup0; i++ ) fprintf( stderr, "aseq[l0i] = \n%s\n", aseq[list0[i]] );
+ for( i=0; i<ngroup1; i++ ) fprintf( stderr, "aseq[l1i] = \n%s\n", aseq[list1[i]] );
+ for( i=0; i<ngroup2; i++ ) fprintf( stderr, "aseq[l2i] = \n%s\n", aseq[list2[i]] );
+#endif
+
+#if 0
+ reporterr( "list0[0]=%d\n", list0[0] );
+ reporterr( "list0[%d-1]=%d\n", ngroup0, list0[ngroup0-1] );
+ reporterr( "seq0[list[0]]=%s\n", seq[list0[0]] );
+ reporterr( "aseq0[list[0]]=%s\n", aseq[list0[0]] );
+#endif
+
+
+
+#if 0
+ for( i=0; i<ngroup0; i++ )
+ {
+ if( strlen( aseq[list0[0]] ) != strlen( aseq[list0[i]] ) )
+ {
+ reporterr( "Length error! len[0] = %d, but len[%d] = %d\n", strlen( aseq[list0[0]] ), list0[i], strlen( aseq[list0[i]] ) );
+ bug = 1;
+ break;
+ }
+ }
+ for( i=0; i<ngroup0; i++ )
+ {
+ gappick0equalminustmptmptmp( nogapseq1, aseq[list0[i]] );
+ gappick0equalminustmptmptmp( nogapseq2, seq[list0[i]] );
+ if( strcmp( nogapseq1, nogapseq2 ) ) bug = 1;
+ }
+ for( i=0; i<ngroup1; i++ )
+ {
+ gappick0equalminustmptmptmp( nogapseq1, aseq[list1[i]] );
+ gappick0equalminustmptmptmp( nogapseq2, seq[list1[i]] );
+ if( strcmp( nogapseq1, nogapseq2 ) ) bug = 1;
+ }
+ for( i=0; i<ngroup2; i++ )
+ {
+ gappick0equalminustmptmptmp( nogapseq1, aseq[list2[i]] );
+ gappick0equalminustmptmptmp( nogapseq2, seq[list2[i]] );
+ if( strcmp( nogapseq1, nogapseq2 ) ) bug = 1;
+ }
+
+ free( nogapseq1 );
+ free( nogapseq2 );
+
+ if( bug )
+ {
+ reporterr( "ERROR!!!!!!!\n" );
+ reporterr( ">aseq1[%d], len = %d\n%s\n", list1[0], strlen( aseq[list1[0]] ), aseq[list0[0]] );
+ reporterr( ">seq1[%d], len = %d\n%s\n", list1[0], strlen( seq[list1[0]] ), seq[list0[i]] );
+ exit( 1 );
+
+
+ for( i=0; i<ngroup0; i++ ) reporterr( ">aseq0[%d], len = %d\n%s\n", list0[i], strlen( aseq[list0[i]] ), aseq[list0[i]] );
+ for( i=0; i<ngroup1; i++ ) reporterr( ">aseq1[%d], len = %d\n%s\n", list1[i], strlen( aseq[list1[i]] ), aseq[list1[i]] );
+ for( i=0; i<ngroup2; i++ ) reporterr( ">aseq2[%d], len = %d\n%s\n", list2[i], strlen( aseq[list2[i]] ), aseq[list2[i]] );
+
+ for( i=0; i<ngroup0; i++ ) reporterr( ">seq0[%d], len = %d\n%s\n", list0[i], strlen( seq[list0[i]] ), seq[list0[i]] );
+ for( i=0; i<ngroup1; i++ ) reporterr( ">seq1[%d], len = %d\n%s\n", list1[i], strlen( seq[list1[i]] ), seq[list1[i]] );
+ for( i=0; i<ngroup2; i++ ) reporterr( ">seq2[%d], len = %d\n%s\n", list2[i], strlen( seq[list2[i]] ), seq[list2[i]] );
+ exit( 1 );
+ }
+
+#endif
+
+// for( i=0; i<njob; i++ ) if( mar[i] != 3 ) strcpy( seq[i], aseq[i] );
+ for( i=0; i<ngroup0; i++ ) strcpy( seq[list0[i]], aseq[list0[i]] );
+ for( i=0; i<ngroup1; i++ ) strcpy( seq[list1[i]], aseq[list1[i]] );
+ for( i=0; i<ngroup2; i++ ) strcpy( seq[list2[i]], aseq[list2[i]] );
free( mar );
+ free( gaps );
+ free( list0 );
+ free( list1 );
+ free( list2 );
+ FreeCharMtx( mseq2 );
+ FreeCharMtx( mseq1 ); // ? added 2012/02/12
+ FreeCharMtx( mseq0 );
+ FreeCharMtx( aseq ); // ? added 2012/02/12
+
+}
+
+
+static void reflectsmoothing( char *ref, int *mem, char **seq, int len )
+{
+ char *tmpseq;
+ int i, j, k, p;
+
+// reporterr( "#### reflectsmoothing!!!!!\n" );
+
+// if( mem[1] != -1 ) reporterr( "original = %s\n", seq[mem[1]] );
+
+ tmpseq = calloc( len+1, sizeof( char ) );
+
+
+ for( j=1; (i=mem[j])!=-1; j++ )
+ {
+ eqpick( tmpseq, seq[i] );
+ for( k=0, p=0; p<len; k++ )
+ {
+ while( ref[p] == '=' ) seq[i][p++] = '=';
+ seq[i][p++] = tmpseq[k];
+ }
+ }
free( tmpseq );
- free( tmpgaplen );
+
+// if( mem[1] != -1 ) reporterr( "output = %s\n", seq[mem[1]] );
+
+// reporterr( "#### done!!!!!\n" );
}
-#if 0
-int samemember( int *mem, int *cand )
+static int smoothing1rightmulti( int len, char *ref ) // osoi!
{
- int i, j;
+ int i, j, k;
+ int shiftfrom = -1;
+ int shiftto = -1;
+ int *hit;
+ int val = 0, nhit = 0;
+
+ hit = NULL;
+
+// reporterr( "ref (1rightmulti) = %s\n", ref );
+
+ for( i=1, nhit=0; i<len-1; i++ ) // break nashi no baai, hidarihaji ha saigo
+// for( i=len-2; i>0; i-- ) // break ari no baai, migihajiha saigo
+ {
+ if( ref[i-1] == '+' && ( ref[i] != '+' && ref[i] != '=' ) && ref[i+1] == '=' )
+ {
+// reporterr( "hit! i=%d, len=%d\n", i, len );
+ hit = realloc( hit, (nhit+1) * sizeof( int ) );
+ hit[nhit] = i;
+ nhit += 1;
+// break;
+ }
+ }
+ if( nhit == 0 ) return( 0 );
+
+
+ for( k=0; k<nhit; k++ )
+ {
+ for( j=hit[k]+1; j<=len; j++ )
+ {
+ if( ref[j] != '=' )
+ {
+ shiftto = j-1;
+ break;
+ }
+ }
+ if( j == len && ref[len-1] == '=' )
+ {
+ reporterr( "hit[i].end = %d, j = len-1, skip!\n" );
+ continue;
+ }
+
+ if( shiftto < len-1 && ref[shiftto+1] == '+' ) continue; // muda dakara
+
+ val += 1;
+ shiftfrom = hit[k];
+ if( ref[shiftto] != '=' ) // atode sakujo
+ {
+ reporterr( "Error in smoothing1left!\n" );
+ exit( 1 );
+ }
+ ref[shiftto] = ref[shiftfrom];
+ ref[shiftfrom] = '=';
+ }
+ free( hit );
+
+// reporterr( "ref (1rightmulti) = %s\n", ref );
+ reporterr( " %d out of %d have been smoothed (right).\n", val, nhit );
+
+// if( nhit > 1 ) exit( 1 );
+ return( val );
+}
+
+static int smoothing1leftmulti( int len, char *ref ) // osoi!
+{
+ int i, j, k;
+ int shiftfrom = -1;
+ int shiftto = -1;
+ int *hit;
+ int val = 0, nhit = 0;
+
+ hit = NULL;
+
+// reporterr( "ref (1leftmulti) = %s\n", ref );
+
+ for( i=1, nhit=0; i<len-1; i++ ) // break nashi no baai, hidarihaji ha saigo
+// for( i=len-2; i>0; i-- ) // break ari no baai, migihajiha saigo
+ {
+ if( ref[i-1] == '=' && ( ref[i] != '+' && ref[i] != '=' ) && ref[i+1] == '+' )
+ {
+// reporterr( "hit! i=%d, len=%d\n", i, len );
+ hit = realloc( hit, (nhit+1) * sizeof( int ) );
+ hit[nhit] = i;
+ nhit += 1;
+// break;
+ }
+ }
+ if( nhit == 0 ) return( 0 );
+
+ for( k=0; k<nhit; k++ )
+ {
+ for( j=hit[k]-1; j>-1; j-- )
+ {
+ if( ref[j] != '=' )
+ {
+ shiftto = j+1;
+ break;
+ }
+ }
+ if( j == -1 && ref[0] == '=' )
+ {
+ reporterr( "hit[i].end = %d, j = -1, skip!\n" );
+ continue;
+ }
+
+ if( shiftto > 0 && ref[shiftto-1] == '+' ) continue; // muda dakara
+
+ val += 1;
+ shiftfrom = hit[k];
+ if( ref[shiftto] != '=' ) // atode sakujo
+ {
+ reporterr( "Error in smoothing1left!\n" );
+ exit( 1 );
+ }
+ ref[shiftto] = ref[shiftfrom];
+ ref[shiftfrom] = '=';
+
+ }
+ free( hit );
+
+// reporterr( "ref (1leftmulti) = %s\n", ref );
+ reporterr( " %d out of %d have been smoothed (left).\n", val, nhit );
+
+
+// if( nhit > 1 ) exit( 1 );
+ return( val );
+}
+
+void restorecommongapssmoothly( int njob, int n0, char **seq, int *ex1, int *ex2, int *gapmap, int alloclen, char gapchar )
+{
+ int *mem;
+ char *tmpseq;
+ char *cptr;
+ int *iptr;
+ int *tmpgapmap;
+ int i, j, k, len, rep1, rep2, len1, klim, leninserted;
+ int totalres;
+
+ if( n0 == 0 ) return;
+
+
+ mem = calloc( njob+1, sizeof( int ) ); // +1 ha iranai.
+ intcpy( mem, ex1 );
+ intcat( mem, ex2 );
+// tmpseq = calloc( alloclen+2, sizeof( char ) );
+// tmpgapmap = calloc( alloclen+2, sizeof( int ) );
+
+#if 0 // iranai
+ for( i=0; (k=mem[i])!=-1; i++ ) // iranai
+ reporterr( "mem[%d] = %d\n", i, k ); // iranai
+ if( i == njob ) // iranai
+ {
+ fprintf( stderr, "Error in restorecommongaps()\n" );
+ free( mem );
+ exit( 1 );
+ }
+#endif
+ rep1 = ex1[0];
+ rep2 = ex2[0];
+ len = strlen( seq[rep1] );
+ len1 = len+1;
+
+ tmpseq = calloc( alloclen, sizeof( char ) );
+ tmpgapmap = calloc( alloclen, sizeof( int ) );
#if 0
- fprintf( stderr, "mem = " );
- for( i=0; mem[i]>-1; i++ ) fprintf( stderr, "%d ", mem[i] );
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
+ reporterr( "seq[rep1] = %s\n", seq[rep1] );
+ reporterr( "seq[rep2] = %s\n", seq[rep2] );
+#endif
- fprintf( stderr, "cand = " );
- for( i=0; cand[i]>-1; i++ ) fprintf( stderr, "%d ", cand[i] );
- fprintf( stderr, "\n" );
+ for( k=0; (i=mem[k])!=-1; k++ )
+ {
+ cptr = tmpseq;
+ for( j=0; j<len1; j++ )
+ {
+ klim = gapmap[j];
+// for( k=0; k<gapmap[j]; k++ )
+ while( klim-- )
+ *(cptr++) = '+'; // ???
+ *(cptr++) = seq[i][j];
+ }
+ *cptr = 0;
+ strcpy( seq[i], tmpseq );
+ }
+#if 0
+ reporterr( "->\n" );
+ reporterr( "seq[rep1] = \n%s\n", seq[rep1] );
+ reporterr( "seq[rep2] = \n%s\n", seq[rep2] );
#endif
- for( i=0, j=0; mem[i]>-1; )
+ leninserted = strlen( seq[rep1] );
+#if 0
+ reporterr( "gapmap =\n" );
+ for(j=0; j<len1; j++)
{
- if( mem[i++] != cand[j++] ) return( 0 );
+ reporterr( "%d", gapmap[j] );
+ for( i=gapmap[j]; i>0; i-- ) reporterr( "-" );
}
+ reporterr( "\n" );
+#endif
- if( cand[j] == -1 )
+#if 0
+ resprev = 10000; // tekitou
+ while( 1 )
{
- return( 1 );
+ res = 0;
+// reporterr( "\nsmoothing1right..\n" );
+ res = (0<smoothing1right( leninserted, seq[rep1], gapmap, seq, ex1 ));
+// reporterr( "done. res = %d\n", res );
+// reporterr( "smoothing1right..\n" );
+ res += (0<smoothing1right( leninserted, seq[rep2], gapmap, seq, ex2 ));
+// reporterr( "done. res = %d\n", res );
+
+// reporterr( "smoothing1left..\n" );
+ res += (0<smoothing1left( leninserted, seq[rep1], gapmap, seq, ex1 ));
+// reporterr( "done. res = %d\n", res );
+// reporterr( "smoothing1left..\n" );
+ res += (0<smoothing1left( leninserted, seq[rep2], gapmap, seq, ex2 ));
+// reporterr( "done. res = %d\n", res );
+
+ reporterr( " Smoothing .. %d \n", res );
+ if( res >= resprev ) break;
+// if( res == 0 ) break;
+ resprev = res;
}
- else
+#else
+ totalres = 0;
+ totalres += smoothing1rightmulti( leninserted, seq[rep1] );
+ totalres += smoothing1leftmulti( leninserted, seq[rep1] );
+ if( totalres ) reflectsmoothing( seq[rep1], ex1, seq, leninserted );
+
+ totalres = 0;
+ totalres += smoothing1rightmulti( leninserted, seq[rep2] );
+ totalres += smoothing1leftmulti( leninserted, seq[rep2] );
+ if( totalres ) reflectsmoothing( seq[rep2], ex2, seq, leninserted );
+#endif
+
+ for( k=0; (i=mem[k])!=-1; k++ ) plus2gapchar( seq[i], gapchar );
+
+#if 0
+ reporterr( "->\n" );
+ reporterr( "seq[rep1] = \n%s\n", seq[rep1] );
+ reporterr( "seq[rep2] = \n%s\n", seq[rep2] );
+ reporterr( "gapmap =\n" );
+ for(j=0; j<len1; j++)
+ {
+ reporterr( "%d", gapmap[j] );
+ for( i=gapmap[j]; i>0; i-- ) reporterr( "-" );
+ }
+ reporterr( "\n" );
+#endif
+
+ iptr = tmpgapmap;
+ for( j=0; j<len1; j++ )
{
- return( 0 );
+ *(iptr++) = gapmap[j];
+ for( k=0; k<gapmap[j]; k++ )
+ *(iptr++) = 0;
}
+ *iptr = -1;
+
+ intcpy( gapmap, tmpgapmap );
+// iptr = tmpgapmap;
+// while( *iptr != -1 ) *gapmap++ = *iptr++;
+
+ free( mem );
+ free( tmpseq );
+ free( tmpgapmap );
}
-#else
-int samemember( int *mem, int *cand )
+
+void restorecommongaps( int njob, int n0, char **seq, int *ex1, int *ex2, int *gapmap, int alloclen, char gapchar )
{
- int i, j;
- int nm, nc;
-#if 0
- fprintf( stderr, "mem = " );
- for( i=0; mem[i]>-1; i++ ) fprintf( stderr, "%d ", mem[i] );
- fprintf( stderr, "\n" );
+ int *mem;
+ char *tmpseq;
+ char *cptr;
+ int *iptr;
+ int *tmpgapmap;
+ int i, j, k, len, rep, len1, klim;
+
- fprintf( stderr, "cand = " );
- for( i=0; cand[i]>-1; i++ ) fprintf( stderr, "%d ", cand[i] );
- fprintf( stderr, "\n" );
+ if( n0 == 0 ) return;
+
+
+ mem = calloc( njob+1, sizeof( int ) ); // +1 ha iranai.
+ intcpy( mem, ex1 );
+ intcat( mem, ex2 );
+// tmpseq = calloc( alloclen+2, sizeof( char ) );
+// tmpgapmap = calloc( alloclen+2, sizeof( int ) );
+
+#if 0 // iranai
+ for( i=0; (k=mem[i])!=-1; i++ ) // iranai
+ reporterr( "mem[%d] = %d\n", i, k ); // iranai
+ if( i == njob ) // iranai
+ {
+ fprintf( stderr, "Error in restorecommongaps()\n" );
+ free( mem );
+ exit( 1 );
+ }
#endif
+ rep = mem[0];
+ len = strlen( seq[rep] );
+ len1 = len+1;
+
+ tmpseq = calloc( alloclen, sizeof( char ) );
+ tmpgapmap = calloc( alloclen, sizeof( int ) );
- nm = 0; for( i=0; mem[i]>-1; i++ ) nm++;
- nc = 0; for( i=0; cand[i]>-1; i++ ) nc++;
- if( nm != nc ) return( 0 );
- for( i=0; mem[i]>-1; i++ )
+ for( k=0; (i=mem[k])!=-1; k++ )
{
- for( j=0; cand[j]>-1; j++ )
- if( mem[i] == cand[j] ) break;
- if( cand[j] == -1 ) return( 0 );
+ cptr = tmpseq;
+ for( j=0; j<len1; j++ )
+ {
+ klim = gapmap[j];
+// for( k=0; k<gapmap[j]; k++ )
+ while( klim-- )
+ *(cptr++) = gapchar; // ???
+ *(cptr++) = seq[i][j];
+ }
+ *cptr = 0;
+ strcpy( seq[i], tmpseq );
}
- if( mem[i] == -1 )
+ iptr = tmpgapmap;
+ for( j=0; j<len1; j++ )
{
- return( 1 );
+ *(iptr++) = gapmap[j];
+ for( k=0; k<gapmap[j]; k++ )
+ *(iptr++) = 0;
}
- else
+ *iptr = -1;
+
+ iptr = tmpgapmap;
+ while( *iptr != -1 ) *gapmap++ = *iptr++;
+
+ free( mem );
+ free( tmpseq );
+ free( tmpgapmap );
+}
+
+int deletenewinsertions_whole_eq( int on, int an, char **oseq, char **aseq, int **deletelist )
+{
+ int i, j, p, q, allgap, ndel;
+ int len = strlen( oseq[0] );
+ char *eqseq, tmpc;
+
+// reporterr( "In deletenewinsertions_whole_eq\n" );
+// for( j=0; j<on; j++ ) reporterr( "\no=%s\n", oseq[j] );
+// for( j=0; j<an; j++ ) reporterr( "a=%s\n", aseq[j] );
+
+ eqseq = calloc( len+1, sizeof( char ) );
+ for( i=0; i<len; i++ )
{
- return( 0 );
+ allgap = 0;
+ for( j=0; j<on; j++ )
+ {
+ tmpc = oseq[j][i];
+ if( tmpc != '-' && tmpc != '=' ) break;
+ }
+ if( j == on )
+ allgap = 1;
+
+ if( allgap )
+ {
+ eqseq[i] = '=';
+ }
+ else
+ {
+ eqseq[i] = 'o';
+ }
+ }
+
+// for( j=0; j<1; j++ ) reporterr( "\no = %s\n", oseq[j] );
+// reporterr( "\ne = %s\n", eqseq );
+// for( j=0; j<1; j++ ) reporterr( "a = %s\n", aseq[j] );
+
+ if( deletelist )
+ {
+ for( j=0; j<an; j++ )
+ {
+ ndel = 0;
+ for( i=0,q=0; i<len; i++ )
+ {
+ tmpc = aseq[j][i];
+ if( tmpc != '-' && tmpc != '=' )
+ {
+ if( eqseq[i] == '=' )
+ {
+// reporterr( "deleting %d-%d, %c\n", j, i, aseq[j][i] );
+ deletelist[j] = realloc( deletelist[j], sizeof( int ) * (ndel+2) );
+ deletelist[j][ndel] = q;
+ ndel++;
+ }
+ q++;
+ }
+ }
+ deletelist[j][ndel] = -1;
+ }
+ }
+ for( i=0,p=0; i<len; i++ )
+ {
+
+// if( oseq[0][i] != '=' )
+// reporterr( "i=%d, p=%d, q=%d, originally, %c\n", i, p, q, originallygapped[p]);
+// if( eqseq[i] != '=' && originallygapped[p] != '-' ) // dame!!
+ if( eqseq[i] != '=' )
+ {
+// reporterr( "COPY! p=%d\n", p );
+ if( p != i )
+ {
+ for( j=0; j<on; j++ ) oseq[j][p] = oseq[j][i];
+ for( j=0; j<an; j++ ) aseq[j][p] = aseq[j][i];
+ }
+ p++;
+ }
+ }
+// reporterr( "deletemap = %s\n", deletemap );
+// reporterr( "eqseq = %s\n", eqseq );
+// reporterr( "originallygapped = %s\n", originallygapped );
+ for( j=0; j<on; j++ ) oseq[j][p] = 0;
+ for( j=0; j<an; j++ ) aseq[j][p] = 0;
+
+ free( eqseq );
+
+// for( j=0; j<on; j++ ) reporterr( "\no=%s\n", oseq[j] );
+// for( j=0; j<an; j++ ) reporterr( "a=%s\n", aseq[j] );
+
+ return( i-p );
+}
+
+int deletenewinsertions_whole( int on, int an, char **oseq, char **aseq, int **deletelist )
+{
+ int i, j, p, q, allgap, ndel;
+ int len = strlen( oseq[0] );
+ char *eqseq, tmpc;
+
+// reporterr( "In deletenewinsertions_whole\n" );
+// for( j=0; j<on; j++ ) reporterr( "\no=%s\n", oseq[j] );
+// for( j=0; j<an; j++ ) reporterr( "a=%s\n", aseq[j] );
+
+ eqseq = calloc( len+1, sizeof( char ) );
+ for( i=0,p=0; i<len; i++ )
+ {
+ allgap = 0;
+ for( j=0; j<on; j++ )
+ {
+ tmpc = oseq[j][i];
+ if( tmpc != '-' ) break;
+ }
+ if( j == on )
+ allgap = 1;
+
+ if( allgap )
+ {
+ eqseq[i] = '=';
+ }
+ else
+ {
+ eqseq[i] = 'o';
+ }
+ }
+
+// for( j=0; j<1; j++ ) reporterr( "\no = %s\n", oseq[j] );
+// reporterr( "\ne = %s\n", eqseq );
+// for( j=0; j<1; j++ ) reporterr( "a = %s\n", aseq[j] );
+
+ if( deletelist )
+ {
+ for( j=0; j<an; j++ )
+ {
+ ndel = 0;
+ for( i=0,q=0; i<len; i++ )
+ {
+ tmpc = aseq[j][i];
+ if( tmpc != '-' )
+ {
+ if( eqseq[i] == '=' )
+ {
+// reporterr( "deleting %d-%d, %c\n", j, i, aseq[j][i] );
+ deletelist[j] = realloc( deletelist[j], sizeof( int ) * (ndel+2) );
+ deletelist[j][ndel] = q;
+ ndel++;
+ }
+ q++;
+ }
+ }
+ deletelist[j][ndel] = -1;
+ }
+ }
+ for( i=0,p=0; i<len; i++ )
+ {
+
+// if( oseq[0][i] != '=' )
+// reporterr( "i=%d, p=%d, q=%d, originally, %c\n", i, p, q, originallygapped[p]);
+// if( eqseq[i] != '=' && originallygapped[p] != '-' ) // dame!!
+ if( eqseq[i] != '=' )
+ {
+// reporterr( "COPY! p=%d\n", p );
+ if( p != i )
+ {
+ for( j=0; j<on; j++ ) oseq[j][p] = oseq[j][i];
+ for( j=0; j<an; j++ ) aseq[j][p] = aseq[j][i];
+ }
+ p++;
+ }
+ }
+// reporterr( "deletemap = %s\n", deletemap );
+// reporterr( "eqseq = %s\n", eqseq );
+// reporterr( "originallygapped = %s\n", originallygapped );
+ for( j=0; j<on; j++ ) oseq[j][p] = 0;
+ for( j=0; j<an; j++ ) aseq[j][p] = 0;
+
+ free( eqseq );
+
+// for( j=0; j<on; j++ ) reporterr( "\no=%s\n", oseq[j] );
+// for( j=0; j<an; j++ ) reporterr( "a=%s\n", aseq[j] );
+ return( i-p );
+
+}
+
+
+#if 0
+int maskoriginalgaps( char *repseq, char *originallygapped )
+{
+ int i, p;
+ int len = strlen( repseq );
+// reporterr( "repseq = %s\n", repseq );
+ for( i=0,p=0; i<len; i++ )
+ {
+ if( repseq[i] == '=' )
+ {
+ if( originallygapped[p] == '-' )
+ {
+ repseq[i] = '-';
+ p++;
+ }
+ }
+ else
+ {
+ p++;
+ }
}
+ reporterr( "repseq = %s\n", repseq );
+exit( 1 );
}
+
+void restoregaponlysites( char *originallygapped, int n1, int n2, char **s1, char **s2, int rep )
+{
+ int i, j, p;
+ char *tmpnew;
+ int len;
+ reporterr( "originallygapped = %s\n", originallygapped );
+ reporterr( "s1[0] = %s\n", s1[0] );
+ reporterr( "s1[rep] = %s\n", s1[rep] );
+ reporterr( "s2[0] = %s\n", s2[0] );
+exit( 1 );
+
+ tmpnew = calloc( strlen( originallygapped )+1, sizeof( char ) );
+ len = strlen( s1[0] );
+
+ for( i=0,p=0; i<len; i++ )
+ {
+ reporterr( "i=%d, p=%d, s[]=%c, o[]=%c\n", i, p, s1[0][i], originallygapped[p] );
+ if( originallygapped[p] == 'o' )
+ {
+ tmpnew[p] = s1[0][i];
+ p++;
+ }
+ while( originallygapped[p] == '-' )
+ {
+ tmpnew[p] = '-';
+ p++;
+ }
+ }
+ reporterr( "s1[0] = %s\n", s1[0] );
+ reporterr( "tmpnew = %s\n", tmpnew );
+
+}
+
#endif
-int includemember( int *mem, int *cand ) // mem in cand
+int recordoriginalgaps( char *originallygapped, int n, char **s )
{
int i, j;
+ int len = strlen( s[0] );
+ int v = 0;
+ for( i=0; i<len; i++ )
+ {
+ for( j=0; j<n; j++ ) if( s[j][i] != '-' ) break;
-#if 0
- fprintf( stderr, "mem = " );
- for( i=0; mem[i]>-1; i++ ) fprintf( stderr, "%d ", mem[i] );
- fprintf( stderr, "\n" );
+ if( j == n )
+ originallygapped[i] = '-';
+ else
+ originallygapped[i] = 'o';
+ }
+ originallygapped[i] = 0;
+ return( v );
+}
- fprintf( stderr, "cand = " );
- for( i=0; cand[i]>-1; i++ ) fprintf( stderr, "%d ", cand[i] );
- fprintf( stderr, "\n" );
-#endif
+void restoreoriginalgaps( int n, char **seq, char *originalgaps )
+{
+ int i, j, p;
+ int lenf = strlen( originalgaps );
+ char *tmpseq = calloc( lenf+1, sizeof( char ) );
+
+ for( i=0; i<n; i++ )
+ {
+ for( j=0,p=0; j<lenf; j++ )
+ {
+ if( originalgaps[j] == '-' )
+ tmpseq[j] = '-';
+ else
+ tmpseq[j] = seq[i][p++];
+ }
+ strcpy( seq[i], tmpseq );
+ }
+ free( tmpseq );
+}
- for( i=0; mem[i]>-1; i++ )
+void reconstructdeletemap( int nadd, char **addbk, int **deletelist, char **realn, FILE *fp, char **name )
+{
+ int i, j, p, len;
+ char *gapped, *nameptr, *tmpptr;
+
+ for( i=0; i<nadd; i++ )
{
- for( j=0; cand[j]>-1; j++ )
- if( mem[i] == cand[j] ) break;
- if( cand[j] == -1 ) return( 0 );
+ len = strlen( addbk[i] );
+ gapped = calloc( len+1, sizeof( char ) );
+// for( j=0; j<len; j++ ) gapped[j] = 'o'; // iranai
+// gapped[len] = 0; // iranai
+
+ nameptr = name[i] + 1;
+ if( outnumber )
+ nameptr = strstr( nameptr, "_numo_e" ) + 8;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ fprintf( fp, ">%s\n", nameptr );
+ fprintf( fp, "# letter, position in the original sequence, position in the reference alignment\n" );
+
+// reporterr( "addbk[%d] = %s\n", i, addbk[i] );
+ for( j=0; (p=deletelist[i][j])!=-1; j++ )
+ {
+// reporterr( "deleting %d, %c\n", p, addbk[i][p] );
+ gapped[p] = '-';
+ }
+
+// reporterr( "addbk = %s\n", addbk[i] );
+// reporterr( "gapped = %s\n", gapped );
+
+ for( j=0,p=0; j<len; j++ )
+ {
+ while( realn[i][p] == '-' )
+ p++;
+
+ if( gapped[j] == '-' )
+ {
+ fprintf( fp, "%c, %d, -\n", addbk[i][j], j+1 ); // 1origin
+ }
+ else
+ {
+ fprintf( fp, "%c, %d, %d\n", addbk[i][j], j+1, p+1 ); // 1origin
+ p++;
+ }
+ }
+ free( gapped );
}
-// fprintf( stderr, "INCLUDED! mem[0]=%d\n", mem[0] );
- return( 1 );
}
--- /dev/null
+#include "mltaln.h"
+
+#define SMALLMEMORY 1
+
+#define DEBUG 0
+#define IODEBUG 0
+#define SCOREOUT 0
+
+static int nadd;
+static int treein;
+static int topin;
+static int treeout;
+static int distout;
+static int noalign;
+static int multidist;
+static int maxdist = 2; // scale -> 2bai
+static int allowlongadds;
+static int keeplength;
+static int ndeleted;
+static int mapout;
+static int smoothing;
+static double hitout;
+
+static int tuplesize;
+
+#define PLENFACA 0.01
+#define PLENFACB 10000
+#define PLENFACC 10000
+#define PLENFACD 0.1
+#define D6LENFACA 0.01
+#define D6LENFACB 2500
+#define D6LENFACC 2500
+#define D6LENFACD 0.1
+#define D10LENFACA 0.01
+#define D10LENFACB 1000000
+#define D10LENFACC 1000000
+#define D10LENFACD 0.0
+
+typedef struct _thread_arg
+{
+ int njob;
+ int nadd;
+ int *nlen;
+ int *follows;
+ char **name;
+ char **seq;
+ LocalHom **localhomtable;
+ double **iscore;
+ double **nscore;
+ int *istherenewgap;
+ int **newgaplist;
+ RNApair ***singlerna;
+ double *eff_kozo_mapped;
+ int alloclen;
+ Treedep *dep;
+ int ***topol;
+ double **len;
+ Addtree *addtree;
+ int **deletelist;
+#ifdef enablemultithread
+ int *iaddshare;
+ int thread_no;
+ pthread_mutex_t *mutex_counter;
+#endif
+} thread_arg_t;
+
+
+#ifdef enablemultithread
+typedef struct _gaplist2alnxthread_arg
+{
+// int thread_no;
+ int ncycle;
+ int *jobpospt;
+ int tmpseqlen;
+ int lenfull;
+ char **seq;
+ int *newgaplist;
+ int *posmap;
+ pthread_mutex_t *mutex;
+} gaplist2alnxthread_arg_t;
+
+typedef struct _distancematrixthread_arg
+{
+ int thread_no;
+ int njob;
+ int norg;
+ int *jobpospt;
+ int **pointt;
+ int *nogaplen;
+ double **imtx;
+ double **nmtx;
+ double *selfscore;
+ pthread_mutex_t *mutex;
+} distancematrixthread_arg_t;
+
+typedef struct _jobtable2d
+{
+ int i;
+ int j;
+} Jobtable2d;
+
+typedef struct _dndprethread_arg
+{
+ int njob;
+ int thread_no;
+ double *selfscore;
+ double **mtx;
+ char **seq;
+ Jobtable2d *jobpospt;
+ pthread_mutex_t *mutex;
+} dndprethread_arg_t;
+
+#endif
+
+typedef struct _blocktorealign
+{
+ int start;
+ int end;
+ int nnewres;
+} Blocktorealign;
+
+static void cnctintvec( int *res, int *o1, int *o2 )
+{
+ while( *o1 != -1 ) *res++ = *o1++;
+ while( *o2 != -1 ) *res++ = *o2++;
+ *res = -1;
+}
+
+static void countnewres( int len, Blocktorealign *realign, int *posmap, int *gaplist )
+{
+ int i, regstart, regend, len1;
+ regstart = 0;
+ len1 = len+1;
+ for( i=0; i<len1; i++ )
+ {
+
+ if( realign[i].nnewres || gaplist[i] )
+ {
+ regend = posmap[i]-1;
+ realign[i].start = regstart;
+ realign[i].end = regend;
+ }
+ if( gaplist[i] )
+ {
+ realign[i].nnewres++;
+// fprintf( stderr, "hit? reg = %d-%d\n", regstart, regend );
+ }
+ regstart = posmap[i]+1;
+ }
+}
+static void fillgap( char *s, int len )
+{
+ int orilen = strlen( s );
+ s += orilen;
+ len -= orilen;
+ while( len-- )
+ *s++ = '-';
+ *s = 0;
+}
+
+static int lencomp( const void *a, const void *b ) // osoikamo
+{
+ char **ast = (char **)a;
+ char **bst = (char **)b;
+ int lena = strlen( *ast );
+ int lenb = strlen( *bst );
+// fprintf( stderr, "a=%s, b=%s\n", *ast, *bst );
+// fprintf( stderr, "lena=%d, lenb=%d\n", lena, lenb );
+ if( lena > lenb ) return -1;
+ else if( lena < lenb ) return 1;
+ else return( 0 );
+}
+
+static int dorealignment_tree( Blocktorealign *block, char **fullseq, int *fullseqlenpt, int norg, int ***topol, int *follows )
+{
+ int i, j, k, posinold, newlen, *nmem;
+ int n0, n1, localloclen, nhit, hit1, hit2;
+ int *pickhistory;
+ int nprof1, nprof2, pos, zure;
+ char **prof1, **prof2;
+ int *iinf0, *iinf1;
+ int *group, *nearest, *g2n, ngroup;
+ char ***mem;
+ static char **tmpaln0 = NULL;
+ static char **tmpaln1 = NULL;
+ static char **tmpseq;
+ int ***topolpick;
+ int *tmpint;
+ int *intptr, *intptrx;
+ char *tmpseq0, *cptr, **cptrptr;
+
+
+ localloclen = 4 * ( block->end - block->start + 1 ); // ookisugi?
+ tmpaln0 = AllocateCharMtx( njob, localloclen );
+ tmpaln1 = AllocateCharMtx( njob, localloclen );
+ tmpseq = AllocateCharMtx( 1, *fullseqlenpt * 4 );
+ iinf0 = AllocateIntVec( njob );
+ iinf1 = AllocateIntVec( njob );
+ nearest = AllocateIntVec( njob ); // oosugi
+
+ posinold = block->start;
+
+ n0 = 0;
+ n1 = 0;
+ for( i=0; i<njob; i++ )
+ {
+ strncpy( tmpseq[0], fullseq[i] + block->start, block->end - block->start + 1 );
+ tmpseq[0][block->end - block->start + 1] = 0;
+ commongappick( 1, tmpseq );
+ if( tmpseq[0][0] != 0 )
+ {
+ if( i < norg )
+ {
+ fprintf( stderr, "BUG!!!!\n" );
+ exit( 1 );
+ }
+ strcpy( tmpaln0[n0], tmpseq[0] );
+ iinf0[n0] = i;
+ nearest[n0] = follows[i-norg];
+ n0++;
+ }
+ else
+ {
+ strcpy( tmpaln1[n0], "" );
+ iinf1[n1] = i;
+ n1++;
+ }
+ }
+ mem = AllocateCharCub( n0, n0+1, 0 ); // oosugi
+ nmem = AllocateIntVec( n0 ); // oosugi
+ g2n = AllocateIntVec( n0 ); // oosugi
+ group = AllocateIntVec( n0 ); // oosugi
+ for( i=0; i<n0; i++ ) mem[i][0] = NULL;
+ for( i=0; i<n0; i++ ) nmem[i] = 0;
+ ngroup = 0;
+ for( i=0; i<n0; i++ )
+ {
+ for( j=0; j<i; j++ ) if( nearest[j] == nearest[i] ) break;
+ if( j == i ) group[i] = ngroup++;
+ else group[i] = group[j];
+
+ for( j=0; mem[group[i]][j]; j++ )
+ ;
+ mem[group[i]][j] = tmpaln0[i];
+ mem[group[i]][j+1] = NULL;
+ nmem[group[i]]++;
+ g2n[group[i]] = nearest[i];
+// fprintf( stderr, "%d -> %d -> group%d\n", i, nearest[i], group[i] );
+// fprintf( stderr, "mem[%d][%d] = %s\n", group[i], j, mem[group[i]][j] );
+ }
+
+ for( i=0; i<ngroup; i++ )
+ {
+// fprintf( stderr, "before sort:\n" );
+// for( j=0; j<nmem[i]; j++ ) fprintf( stderr, "%s\n", mem[i][j] );
+// fprintf( stderr, "\n" );
+ qsort( mem[i], nmem[i], sizeof( char * ), lencomp );
+// fprintf( stderr, "after sort:\n" );
+// for( j=0; j<nmem[i]; j++ ) fprintf( stderr, "%s\n", mem[i][j] );
+// fprintf( stderr, "\n" );
+ }
+
+#if 0
+ for( i=1; i<n0; i++ )
+ {
+ profilealignment( 1, n1, i, tmpaln0+i, tmpaln1, tmpaln0, localloclen, alg );
+ }
+ newlen = strlen( tmpaln0[0] );
+ for( i=0; i<n1; i++ ) eq2dash( tmpaln1[i] );
+#else
+// newlen = 0;
+ for( i=0; i<ngroup; i++ )
+ {
+// for( k=0; mem[i][k]; k++ ) fprintf( stderr, "mem[%d][%d] = %s\n", i, j, mem[i][k] );
+
+ for( j=1; j<nmem[i]; j++ )
+ {
+ profilealignment2( 1, j, mem[i]+j, mem[i], localloclen, alg );
+ }
+// for( j=0; j<nmem[i]; j++ ) fprintf( stderr, "j=%d, %s\n", j, mem[i][j] );
+
+#if 0 // iru
+ if( ( j = strlen( mem[i][0] ) ) > newlen ) newlen = j;
+ for( j=0; j<=i; j++ )
+ {
+ for( k=0; mem[j][k]; k++ )
+ fillgap( mem[j][k], newlen );
+ }
+#endif
+
+ }
+#if 0
+ fprintf( stderr, "After ingroupalignment (original order):\n" );
+ for( i=0; i<n0; i++ ) fprintf( stderr, "%s\n", tmpaln0[i] );
+#endif
+#endif
+
+ topolpick = AllocateIntCub( ngroup, 2, ngroup );
+ pickhistory = AllocateIntVec( ngroup );
+ tmpint = AllocateIntVec( 2 );
+ prof1 = AllocateCharMtx( n0, 0 );
+ prof2 = AllocateCharMtx( n0, 0 );
+ for( i=0; i<ngroup; i++ )
+ {
+ topolpick[i][0][0] = -1;
+ topolpick[i][1][0] = -1;
+ pickhistory[i] = -1;
+ }
+
+ nhit = 0;
+ for( i=0; i<norg-1; i++ )
+ {
+ for( intptr=topol[i][0]; *intptr>-1; intptr++ )
+ {
+ for( intptrx=g2n,k=0; k<ngroup; intptrx++,k++ )
+ {
+ if( *intptr == *intptrx )
+ {
+ hit1 = k;
+ goto exitloop1;
+ }
+ }
+ }
+ continue;
+ exitloop1:
+// fprintf( stderr, "hit1! group%d -> %d\n", k, topol[i][0][j] );
+
+ for( intptr=topol[i][1]; *intptr>-1; intptr++ )
+ {
+ for( intptrx=g2n,k=0; k<ngroup; intptrx++,k++ )
+ {
+ if( *intptr == *intptrx )
+ {
+ hit2 = k;
+ goto exitloop2;
+ }
+ }
+ }
+ continue;
+ exitloop2:
+
+ if( pickhistory[hit1] == -1 )
+ {
+ topolpick[nhit][0][0] = hit1;
+ topolpick[nhit][0][1] = -1;
+ }
+ else
+ {
+ intcpy( topolpick[nhit][0], topolpick[pickhistory[hit1]][0] );
+ intcat( topolpick[nhit][0], topolpick[pickhistory[hit1]][1] );
+ }
+ if( pickhistory[hit2] == -1 )
+ {
+ topolpick[nhit][1][0] = hit2;
+ topolpick[nhit][1][1] = -1;
+ }
+ else
+ {
+ intcpy( topolpick[nhit][1], topolpick[pickhistory[hit2]][0] );
+ intcat( topolpick[nhit][1], topolpick[pickhistory[hit2]][1] );
+ }
+
+ pickhistory[hit1] = nhit;
+ pickhistory[hit2] = nhit;
+ nhit++;
+// g2n[hit1] = -1;
+// g2n[hit2] = -1;
+
+// fprintf( stderr, "hit2! group%d -> %d\n", k, topol[i][1][j] );
+
+#if 0
+ fprintf( stderr, "\nHIT!!! \n" );
+ fprintf( stderr, "\nSTEP %d\n", i );
+ for( j=0; topol[i][0][j]>-1; j++ ) fprintf( stderr, "%3d ", topol[i][0][j] );
+ fprintf( stderr, "\n" );
+ for( j=0; topol[i][1][j]>-1; j++ ) fprintf( stderr, "%3d ", topol[i][1][j] );
+ fprintf( stderr, "\n" );
+#endif
+ }
+
+ for( i=0; i<ngroup-1; i++ )
+ {
+#if 0
+ fprintf( stderr, "\npickSTEP %d\n", i );
+ for( j=0; topolpick[i][0][j]>-1; j++ ) fprintf( stderr, "%3d ", topolpick[i][0][j] );
+ fprintf( stderr, "\n" );
+ for( j=0; topolpick[i][1][j]>-1; j++ ) fprintf( stderr, "%3d ", topolpick[i][1][j] );
+ fprintf( stderr, "\n" );
+#endif
+
+ pos = 0;
+// for( j=0; topolpick[i][0][j]>-1; j++ ) for( k=0; (cptr=mem[topolpick[i][0][j]][k]); k++ ) prof1[pos++] = cptr;
+ for( intptr=topolpick[i][0]; *intptr>-1; intptr++ )
+ for( cptrptr=mem[*intptr]; (cptr=*cptrptr); cptrptr++ )
+ prof1[pos++] = cptr;
+ nprof1 = pos;
+ pos = 0;
+// for( j=0; topolpick[i][1][j]>-1; j++ ) for( k=0; (cptr=mem[topolpick[i][1][j]][k]); k++ ) prof2[pos++] = cptr;
+ for( intptr=topolpick[i][1]; *intptr>-1; intptr++ )
+ for( cptrptr=mem[*intptr]; (cptr=*cptrptr); cptrptr++ )
+ prof2[pos++] = cptr;
+ nprof2 = pos;
+
+
+ profilealignment2( nprof1, nprof2, prof1, prof2, localloclen, alg );
+#if 0
+ for( j=0; j<nprof1; j++ ) fprintf( stderr, "prof1[%d] = %s\n", j, prof1[j] );
+ for( j=0; j<nprof2; j++ ) fprintf( stderr, "prof2[%d] = %s\n", j, prof2[j] );
+ fprintf( stderr, "done.\n" );
+#endif
+ }
+ newlen = strlen( tmpaln0[0] );
+ for( j=0; j<n1; j++ ) fillgap( tmpaln1[j], newlen );
+
+#if 0
+ fprintf( stderr, "After rerealignment (original order):\n" );
+ for( i=0; i<n0; i++ ) fprintf( stderr, "%s\n", tmpaln0[i] );
+#endif
+
+// newlen = strlen( tmpaln0[0] );
+ zure = ( block->end - block->start + 1 - newlen );
+// fprintf( stderr, "zure = %d, localloclen=%d, newlen=%d\n", zure, localloclen, newlen );
+
+
+ if( *fullseqlenpt < strlen( fullseq[0] ) - (block->end-block->start+1) + newlen + 1 )
+ {
+ *fullseqlenpt = strlen( fullseq[0] ) * 2;
+ fprintf( stderr, "reallocating..." );
+ for( i=0; i<njob; i++ )
+ {
+ fullseq[i] = realloc( fullseq[i], *fullseqlenpt * sizeof( char ) );
+ if( !fullseq[i] )
+ {
+ fprintf( stderr, "Cannot reallocate seq[][]\n" );
+ exit( 1 );
+ }
+ }
+ fprintf( stderr, "done.\n" );
+ }
+
+
+ tmpseq0 = tmpseq[0];
+ posinold = block->end+1;
+ for( i=0; i<n0; i++ )
+ {
+ strncpy( tmpseq0, tmpaln0[i], newlen );
+ strcpy( tmpseq0+newlen, fullseq[iinf0[i]] + posinold );
+ strcpy( fullseq[iinf0[i]]+block->start, tmpseq0 );
+ }
+ for( i=0; i<n1; i++ )
+ {
+// eq2dash( tmpaln1[i] );
+ strncpy( tmpseq0, tmpaln1[i], newlen );
+ strcpy( tmpseq0+newlen, fullseq[iinf1[i]] + posinold );
+ strcpy( fullseq[iinf1[i]]+block->start, tmpseq0 );
+ }
+ FreeCharMtx( tmpaln0 );
+ FreeCharMtx( tmpaln1 );
+ FreeCharMtx( tmpseq );
+ for( i=0; i<n0; i++ )
+ {
+// for( j=0; j<njob; j++ ) free( mem[i][j] );
+ free( mem[i] );
+ }
+ free( mem );
+ free( nmem );
+ free( iinf0 );
+ free( iinf1 );
+ free( group );
+ free( g2n );
+ free( prof1 );
+ free( prof2 );
+ free( nearest );
+ FreeIntCub( topolpick );
+ free( pickhistory );
+ free( tmpint );
+
+ return( zure );
+}
+
+
+#if 0
+static int dorealignment( Blocktorealign *block, char **fullseq, int alloclen, int fullseqlen, int norg )
+{
+ int i, posinnew, posinold, newlen;
+ int n0, n1;
+ int zure;
+ static int *iinf0, *iinf1;
+ static char **tmpaln0 = NULL;
+ static char **tmpaln1 = NULL;
+ static char **tmpseq;
+ char *opt, *npt;
+
+ if( tmpaln0 == NULL )
+ {
+ tmpaln0 = AllocateCharMtx( njob, alloclen );
+ tmpaln1 = AllocateCharMtx( njob, alloclen );
+ tmpseq = AllocateCharMtx( 1, fullseqlen );
+ iinf0 = AllocateIntVec( njob );
+ iinf1 = AllocateIntVec( njob );
+ }
+ posinold = block->start;
+
+
+ n0 = 0;
+ n1 = 0;
+ for( i=0; i<njob; i++ )
+ {
+ strncpy( tmpseq[0], fullseq[i] + block->start, block->end - block->start + 1 );
+ tmpseq[0][block->end - block->start + 1] = 0;
+ commongappick( 1, tmpseq );
+// if( strlen( tmpseq[0] ) > 0 )
+ if( tmpseq[0][0] != 0 )
+ {
+ if( i < norg )
+ {
+ fprintf( stderr, "BUG!!!!\n" );
+ exit( 1 );
+ }
+ strcpy( tmpaln0[n0], tmpseq[0] );
+ iinf0[n0] = i;
+ n0++;
+ }
+ else
+ {
+ strcpy( tmpaln1[n0], "" );
+ iinf1[n1] = i;
+ n1++;
+ }
+ }
+
+
+ for( i=1; i<n0; i++ )
+ {
+ profilealignment( 1, n1, i, tmpaln0+i, tmpaln1, tmpaln0, alloclen, alg ); // n1 ha allgap
+ }
+
+#if 1
+ fprintf( stderr, "After realignment:\n" );
+ for( i=0; i<n0; i++ ) fprintf( stderr, "%s\n", tmpaln0[i] );
+// for( i=0; i<n1; i++ ) fprintf( stderr, "%s\n", tmpaln1[i] );
+#endif
+
+ newlen = strlen( tmpaln0[0] );
+ for( i=0; i<n0; i++ ) strncpy( fullseq[iinf0[i]]+block->start, tmpaln0[i], newlen );
+ for( i=0; i<n1; i++ )
+ {
+ eq2dash( tmpaln1[i] );
+ strncpy( fullseq[iinf1[i]] + block->start, tmpaln1[i], newlen );
+ }
+
+ posinold = block->end+1;
+ posinnew = block->start + newlen;
+
+
+ zure = ( block->end - block->start + 1 - strlen( tmpaln0[0] ) );
+
+ for( i=0; i<njob; i++ )
+ {
+#if 0
+ strcpy( fullseq[i]+posinnew, fullseq[i]+posinold ); // ??
+#else
+ opt = fullseq[i] + posinold;
+ npt = fullseq[i] + posinnew;
+ while( ( *npt++ = *opt++ ) );
+ *npt = 0;
+#endif
+ }
+
+ return( zure );
+}
+#endif
+
+static void adjustposmap( int len, int *posmap, int *gaplist )
+{
+ int *newposmap;
+ int *mpt1, *mpt2;
+ int lenbk, zure;
+ newposmap = calloc( len+2, sizeof( int ) );
+ lenbk = len;
+ zure = 0;
+ mpt1 = newposmap;
+ mpt2 = posmap;
+
+#if 0
+ int i;
+ fprintf( stderr, "posmapa = " );
+ for( i=0; i<len+2; i++ )
+ {
+ fprintf( stderr, "%3d ", posmap[i] );
+ }
+ fprintf( stderr, "\n" );
+#endif
+
+ while( len-- )
+ {
+ zure += *gaplist++;
+ *mpt1++ = *mpt2++ + zure;
+ }
+ zure += *gaplist++;
+ *mpt1 = *mpt2 + zure;
+
+ mpt1 = newposmap;
+ mpt2 = posmap;
+ len = lenbk;
+ while( len-- ) *mpt2++ = *mpt1++;
+ *mpt2 = *mpt1;
+ free( newposmap );
+#if 0
+ fprintf( stderr, "posmapm = " );
+ for( i=0; i<lenbk+2; i++ )
+ {
+ fprintf( stderr, "%3d ", posmap[i] );
+ }
+ fprintf( stderr, "\n" );
+#endif
+}
+
+static int insertgapsbyotherfragments_compact( int len, char *a, char *s, int *l, int *p )
+{
+ int gaplen;
+ int i, pos, pi;
+ int prevp = -1;
+ int realignment = 0;
+// fprintf( stderr, "### insertgapsbyotherfragments\n" );
+ for( i=0; i<len; i++ )
+ {
+ gaplen = l[i];
+ pi = p[i];
+ pos = prevp + 1;
+// fprintf( stderr, "gaplen = %d\n", gaplen );
+ while( gaplen-- )
+ {
+ pos++;
+ *a++ = *s++;
+ }
+// fprintf( stderr, "pos = %d, pi = %d\n", pos, pi );
+ while( pos++ < pi )
+ {
+ *a++ = '=';
+ realignment = 1;
+ }
+ *a++ = *s++;
+ prevp = pi;
+ }
+ gaplen = l[i];
+ pi = p[i];
+ pos = prevp + 1;
+ while( gaplen-- )
+ {
+ pos++;
+ *a++ = *s++;
+ }
+ while( pos++ < pi )
+ {
+ *a++ = '=';
+ realignment = 1;
+ }
+ *a = 0;
+ return( realignment );
+}
+
+void makegaplistcompact( int len, int *p, int *c, int *l )
+{
+ int i;
+ int pg;
+ int prep = -1;
+ for( i=0; i<len+2; i++ )
+ {
+ if( ( pg = p[i]-prep-1) > 0 && l[i] > 0 )
+ {
+ if( pg < l[i] )
+ {
+ c[i] = l[i] - pg;
+ }
+ else
+ {
+ c[i] = 0;
+ }
+ }
+ else
+ {
+ c[i] = l[i];
+ }
+ prep = p[i];
+ }
+}
+
+
+void gaplist2alnx( int len, char *a, char *s, int *l, int *p, int lenlimit )
+{
+ int gaplen;
+ int pos, pi, posl;
+ int prevp = -1;
+ int reslen = 0;
+ char *sp;
+// char *abk = a;
+
+#if 0
+ int i;
+ char *abk = a;
+ fprintf( stderr, "s = %s\n", s );
+ fprintf( stderr, "posmap = " );
+ for( i=0; i<len+2; i++ )
+ {
+ fprintf( stderr, "%3d ", p[i] );
+ }
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "gaplist = " );
+ for( i=0; i<len+2; i++ )
+ {
+ fprintf( stderr, "%3d ", l[i] );
+ }
+ fprintf( stderr, "\n" );
+#endif
+ while( len-- )
+ {
+ gaplen = *l++;
+ pi = *p++;
+
+ if( (reslen+=gaplen) > lenlimit )
+ {
+ fprintf( stderr, "Length over. Please recompile!\n" );
+ exit( 1 );
+ }
+ while( gaplen-- ) *a++ = '-';
+
+ pos = prevp + 1;
+ sp = s + pos;
+ if( ( posl = pi - pos ) )
+ {
+ if( ( reslen += posl ) > lenlimit )
+ {
+ fprintf( stderr, "Length over. Please recompile\n" );
+ exit( 1 );
+ }
+ while( posl-- ) *a++ = *sp++;
+ }
+
+ if( reslen++ > lenlimit )
+ {
+ fprintf( stderr, "Length over. Please recompile\n" );
+ exit( 1 );
+ }
+ *a++ = *sp;
+ prevp = pi;
+ }
+
+ gaplen = *l;
+ pi = *p;
+ if( (reslen+=gaplen) > lenlimit )
+ {
+ fprintf( stderr, "Length over. Please recompile\n" );
+ exit( 1 );
+ }
+ while( gaplen-- ) *a++ = '-';
+
+ pos = prevp + 1;
+ sp = s + pos;
+ if( ( posl = pi - pos ) )
+ {
+ if( ( reslen += posl ) > lenlimit )
+ {
+ fprintf( stderr, "Length over. Please recompile\n" );
+ exit( 1 );
+ }
+ while( posl-- ) *a++ = *sp++;
+ }
+ *a = 0;
+// fprintf( stderr, "reslen = %d, strlen(a) = %d\n", reslen, strlen( abk ) );
+// fprintf( stderr, "a = %s\n", abk );
+}
+
+static void makenewgaplist( int *l, char *a )
+{
+ while( 1 )
+ {
+ while( *a == '=' )
+ {
+ a++;
+ (*l)++;
+// fprintf( stderr, "a[] (i) = %s, *l=%d\n", a, *(l) );
+ }
+ *++l = 0;
+ if( *a == 0 ) break;
+ a++;
+ }
+ *l = -1;
+}
+
+
+void arguments( int argc, char *argv[] )
+{
+ int c;
+
+ nthread = 1;
+ outnumber = 0;
+ scoreout = 0;
+ treein = 0;
+ topin = 0;
+ rnaprediction = 'm';
+ rnakozo = 0;
+ nevermemsave = 0;
+ inputfile = NULL;
+ addfile = NULL;
+ addprofile = 1;
+ fftkeika = 0;
+ constraint = 0;
+ nblosum = 62;
+ fmodel = 0;
+ calledByXced = 0;
+ devide = 0;
+ use_fft = 0; // chuui
+ force_fft = 0;
+ fftscore = 1;
+ fftRepeatStop = 0;
+ fftNoAnchStop = 0;
+ weight = 3;
+ utree = 1;
+ tbutree = 1;
+ refine = 0;
+ check = 1;
+ cut = 0.0;
+ disp = 0;
+ outgap = 1;
+ alg = 'A';
+ mix = 0;
+ tbitr = 0;
+ scmtd = 5;
+ tbweight = 0;
+ tbrweight = 3;
+ checkC = 0;
+ treemethod = 'X';
+ sueff_global = 0.1;
+ contin = 0;
+ scoremtx = 1;
+ kobetsubunkatsu = 0;
+ dorp = NOTSPECIFIED;
+ ppenalty = NOTSPECIFIED;
+ penalty_shift_factor = 1000.0;
+ ppenalty_ex = NOTSPECIFIED;
+ poffset = NOTSPECIFIED;
+ kimuraR = NOTSPECIFIED;
+ pamN = NOTSPECIFIED;
+ geta2 = GETA2;
+ fftWinSize = NOTSPECIFIED;
+ fftThreshold = NOTSPECIFIED;
+ RNAppenalty = NOTSPECIFIED;
+ RNAppenalty_ex = NOTSPECIFIED;
+ RNApthr = NOTSPECIFIED;
+ TMorJTT = JTT;
+ consweight_multi = 1.0;
+ consweight_rna = 0.0;
+ nadd = 0;
+ multidist = 0;
+ tuplesize = -1;
+ legacygapcost = 0;
+ allowlongadds = 0;
+ keeplength = 0;
+ mapout = 0;
+ smoothing = 0;
+ distout = 0;
+ hitout = 0.0;
+ nwildcard = 0;
+
+ while( --argc > 0 && (*++argv)[0] == '-' )
+ {
+ while ( ( c = *++argv[0] ) )
+ {
+ switch( c )
+ {
+ case 'i':
+ inputfile = *++argv;
+ fprintf( stderr, "inputfile = %s\n", inputfile );
+ --argc;
+ goto nextoption;
+ case 'I':
+ nadd = myatoi( *++argv );
+ fprintf( stderr, "nadd = %d\n", nadd );
+ --argc;
+ goto nextoption;
+ case 'e':
+ RNApthr = (int)( atof( *++argv ) * 1000 - 0.5 );
+ --argc;
+ goto nextoption;
+ case 'o':
+ RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
+ --argc;
+ goto nextoption;
+ case 'f':
+ ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
+// fprintf( stderr, "ppenalty = %d\n", ppenalty );
+ --argc;
+ goto nextoption;
+ case 'Q':
+ penalty_shift_factor = atof( *++argv );
+ --argc;
+ goto nextoption;
+ case 'g':
+ ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
+ fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
+ --argc;
+ goto nextoption;
+ case 'h':
+ poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
+// fprintf( stderr, "poffset = %d\n", poffset );
+ --argc;
+ goto nextoption;
+ case 'k':
+ kimuraR = myatoi( *++argv );
+ fprintf( stderr, "kappa = %d\n", kimuraR );
+ --argc;
+ goto nextoption;
+ case 'b':
+ nblosum = myatoi( *++argv );
+ scoremtx = 1;
+ fprintf( stderr, "blosum %d / kimura 200\n", nblosum );
+ --argc;
+ goto nextoption;
+ case 'j':
+ pamN = myatoi( *++argv );
+ scoremtx = 0;
+ TMorJTT = JTT;
+ fprintf( stderr, "jtt/kimura %d\n", pamN );
+ --argc;
+ goto nextoption;
+ case 'm':
+ pamN = myatoi( *++argv );
+ scoremtx = 0;
+ TMorJTT = TM;
+ fprintf( stderr, "tm %d\n", pamN );
+ --argc;
+ goto nextoption;
+ case 'l':
+ fastathreshold = atof( *++argv );
+ constraint = 2;
+ --argc;
+ goto nextoption;
+ case 'r':
+ consweight_rna = atof( *++argv );
+ rnakozo = 1;
+ --argc;
+ goto nextoption;
+ case 'c':
+ consweight_multi = atof( *++argv );
+ --argc;
+ goto nextoption;
+ case 'C':
+ nthread = myatoi( *++argv );
+ fprintf( stderr, "nthread = %d\n", nthread );
+ --argc;
+ goto nextoption;
+#if 0
+ case 'R':
+ rnaprediction = 'r';
+ break;
+ case 's':
+ RNAscoremtx = 'r';
+ break;
+#endif
+#if 1
+ case 'a':
+ fmodel = 1;
+ break;
+#endif
+ case 'K':
+ addprofile = 0;
+ break;
+ case 'y':
+ distout = 1;
+ break;
+ case '^':
+ hitout = atof( *++argv );
+ --argc;
+ goto nextoption;
+ case 't':
+ treeout = 1;
+ break;
+ case 'T':
+ noalign = 1;
+ break;
+ case 'D':
+ dorp = 'd';
+ break;
+ case 'P':
+ dorp = 'p';
+ break;
+#if 1
+ case 'O':
+ outgap = 0;
+ break;
+#else
+ case 'O':
+ fftNoAnchStop = 1;
+ break;
+#endif
+ case 'S':
+ scoreout = 1;
+ break;
+#if 0
+ case 'e':
+ fftscore = 0;
+ break;
+ case 'r':
+ fmodel = -1;
+ break;
+ case 'R':
+ fftRepeatStop = 1;
+ break;
+ case 's':
+ treemethod = 's';
+ break;
+#endif
+ case 'X':
+ treemethod = 'X';
+ sueff_global = atof( *++argv );
+ fprintf( stderr, "sueff_global = %f\n", sueff_global );
+ --argc;
+ goto nextoption;
+ case 'E':
+ treemethod = 'E';
+ break;
+ case 'q':
+ treemethod = 'q';
+ break;
+ case 'n' :
+ outnumber = 1;
+ break;
+#if 0
+ case 'a':
+ alg = 'a';
+ break;
+ case 'Q':
+ alg = 'Q';
+ break;
+#endif
+ case 'H':
+ alg = 'H';
+ break;
+ case 'A':
+ alg = 'A';
+ break;
+ case 'M':
+ alg = 'M';
+ break;
+ case 'N':
+ nevermemsave = 1;
+ break;
+ case 'B': // hitsuyou! memopt -M -B no tame
+ break;
+ case 'F':
+ use_fft = 1;
+ break;
+ case 'G':
+ force_fft = 1;
+ use_fft = 1;
+ break;
+ case 'U':
+ treein = 1;
+ break;
+ case 'V':
+ allowlongadds = 1;
+ break;
+ case 'p':
+ smoothing = 1;
+ break;
+#if 0
+ case 'V':
+ topin = 1;
+ break;
+#endif
+ case 'u':
+ tbrweight = 0;
+ weight = 0;
+ break;
+ case 'v':
+ tbrweight = 3;
+ break;
+ case 'd':
+ multidist = 1;
+ break;
+ case 'W':
+ tuplesize = myatoi( *++argv );
+ --argc;
+ goto nextoption;
+#if 0
+ case 'd':
+ disp = 1;
+ break;
+#endif
+/* Modified 01/08/27, default: user tree */
+ case 'J':
+ tbutree = 0;
+ break;
+/* modification end. */
+ case 'z':
+ fftThreshold = myatoi( *++argv );
+ --argc;
+ goto nextoption;
+ case 'w':
+ fftWinSize = myatoi( *++argv );
+ --argc;
+ goto nextoption;
+#if 0
+ case 'Z':
+ checkC = 1;
+ break;
+#endif
+ case 'L':
+ legacygapcost = 1;
+ break;
+ case 'Y':
+ keeplength = 1;
+ break;
+ case 'Z':
+ mapout = 1;
+ break;
+ case ':':
+ nwildcard = 1;
+ break;
+ default:
+ fprintf( stderr, "illegal option %c\n", c );
+ argc = 0;
+ break;
+ }
+ }
+ nextoption:
+ ;
+ }
+ if( argc == 1 )
+ {
+ cut = atof( (*argv) );
+ argc--;
+ }
+ if( argc != 0 )
+ {
+ fprintf( stderr, "options: Check source file !\n" );
+ exit( 1 );
+ }
+ if( tbitr == 1 && outgap == 0 )
+ {
+ fprintf( stderr, "conflicting options : o, m or u\n" );
+ exit( 1 );
+ }
+ if( alg == 'C' && outgap == 0 )
+ {
+ fprintf( stderr, "conflicting options : C, o\n" );
+ exit( 1 );
+ }
+}
+
+
+static double treebase( int nseq, int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen, LocalHom **localhomtable, RNApair ***singlerna, double *effarr_kozo )
+{
+
+ int i, l, m;
+ int len1nocommongap, len2nocommongap;
+ int len1, len2;
+ int clus1, clus2;
+ double pscore, tscore;
+ char *indication1, *indication2;
+ double *effarr1 = NULL;
+ double *effarr2 = NULL;
+ double *effarr1_kozo = NULL;
+ double *effarr2_kozo = NULL;
+ LocalHom ***localhomshrink = NULL;
+ int *fftlog;
+ int m1, m2;
+ int *gaplen;
+ int *gapmap;
+ int *alreadyaligned;
+// double dumfl = 0.0;
+ double dumdb = 0.0;
+ int ffttry;
+ RNApair ***grouprna1, ***grouprna2;
+
+ if( rnakozo && rnaprediction == 'm' )
+ {
+ grouprna1 = (RNApair ***)calloc( nseq, sizeof( RNApair ** ) );
+ grouprna2 = (RNApair ***)calloc( nseq, sizeof( RNApair ** ) );
+ }
+ else
+ {
+ grouprna1 = grouprna2 = NULL;
+ }
+
+ fftlog = AllocateIntVec( nseq );
+ effarr1 = AllocateDoubleVec( nseq );
+ effarr2 = AllocateDoubleVec( nseq );
+ indication1 = AllocateCharVec( 150 );
+ indication2 = AllocateCharVec( 150 );
+ alreadyaligned = AllocateIntVec( nseq );
+ if( constraint )
+ {
+ localhomshrink = (LocalHom ***)calloc( nseq, sizeof( LocalHom ** ) );
+#if SMALLMEMORY
+ if( multidist )
+ {
+ for( i=0; i<nseq; i++) localhomshrink[i] = (LocalHom **)calloc( 1, sizeof( LocalHom *) );
+ }
+ else
+#endif
+ {
+ for( i=0; i<nseq; i++) localhomshrink[i] = (LocalHom **)calloc( nseq, sizeof( LocalHom *) );
+ }
+ }
+ effarr1_kozo = AllocateDoubleVec( nseq ); //tsuneni allocate sareru.
+ effarr2_kozo = AllocateDoubleVec( nseq ); //tsuneni allocate sareru.
+ for( i=0; i<nseq; i++ ) effarr1_kozo[i] = 0.0;
+ for( i=0; i<nseq; i++ ) effarr2_kozo[i] = 0.0;
+
+ gaplen = AllocateIntVec( *alloclen+10 ); // maikai shokika
+ gapmap = AllocateIntVec( *alloclen+10 ); // maikai shokika
+ for( i=0; i<nseq-1; i++ ) alreadyaligned[i] = 1;
+ alreadyaligned[nseq-1] = 0;
+
+ for( l=0; l<nseq; l++ ) fftlog[l] = 1;
+
+
+ if( constraint )
+ {
+#if SMALLMEMORY
+ if( multidist )
+ dontcalcimportance_firstone( nseq, effarr, aseq, localhomtable );
+ else
+ calcimportance( nseq, effarr, aseq, localhomtable );
+#else
+ calcimportance( nseq, effarr, aseq, localhomtable );
+#endif
+ }
+
+ tscore = 0.0;
+ for( l=0; l<nseq-1; l++ )
+ {
+ if( mergeoralign[l] == 'n' )
+ {
+// fprintf( stderr, "SKIP!\n" );
+#if 0
+ free( topol[l][0] );
+ free( topol[l][1] );
+ free( topol[l] );
+#endif
+ continue;
+ }
+
+ m1 = topol[l][0][0];
+ m2 = topol[l][1][0];
+ len1 = strlen( aseq[m1] );
+ len2 = strlen( aseq[m2] );
+ if( *alloclen < len1 + len2 )
+ {
+#if 0
+ fprintf( stderr, "\nReallocating.." );
+ *alloclen = ( len1 + len2 ) + 1000;
+ ReallocateCharMtx( aseq, nseq, *alloclen + 10 );
+ gaplen = realloc( gaplen, ( *alloclen + 10 ) * sizeof( int ) );
+ if( gaplen == NULL )
+ {
+ fprintf( stderr, "Cannot realloc gaplen\n" );
+ exit( 1 );
+ }
+ gapmap = realloc( gapmap, ( *alloclen + 10 ) * sizeof( int ) );
+ if( gapmap == NULL )
+ {
+ fprintf( stderr, "Cannot realloc gapmap\n" );
+ exit( 1 );
+ }
+ fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
+#else
+ fprintf( stderr, "Length over!\n" );
+ exit( 1 );
+#endif
+ }
+
+ if( effarr_kozo )
+ {
+ clus1 = fastconjuction_noname_kozo( topol[l][0], aseq, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
+ clus2 = fastconjuction_noname_kozo( topol[l][1], aseq, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
+ }
+ else
+ {
+ clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1, 0.0 );
+ clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2, 0.0 );
+ }
+
+ if( mergeoralign[l] == '1' || mergeoralign[l] == '2' )
+ {
+ newgapstr = "=";
+ }
+ else
+ newgapstr = "-";
+
+
+ len1nocommongap = len1;
+ len2nocommongap = len2;
+ if( mergeoralign[l] == '1' ) // nai
+ {
+ findcommongaps( clus2, mseq2, gapmap );
+ commongappick( clus2, mseq2 );
+ len2nocommongap = strlen( mseq2[0] );
+ }
+ else if( mergeoralign[l] == '2' )
+ {
+ findcommongaps( clus1, mseq1, gapmap );
+ commongappick( clus1, mseq1 );
+ len1nocommongap = strlen( mseq1[0] );
+ }
+
+
+// fprintf( trap_g, "\nSTEP-%d\n", l );
+// fprintf( trap_g, "group1 = %s\n", indication1 );
+// fprintf( trap_g, "group2 = %s\n", indication2 );
+//
+#if 1
+// fprintf( stderr, "\rSTEP % 5d /%d ", l+1, nseq-1 );
+// fflush( stderr );
+#else
+ fprintf( stdout, "STEP %d /%d\n", l+1, nseq-1 );
+ fprintf( stderr, "STEP %d /%d\n", l+1, nseq-1 );
+ fprintf( stderr, "group1 = %.66s", indication1 );
+ if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "group2 = %.66s", indication2 );
+ if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
+ fprintf( stderr, "\n" );
+#endif
+
+
+
+// for( i=0; i<clus1; i++ ) fprintf( stderr, "## STEP%d-eff for mseq1-%d %f\n", l+1, i, effarr1[i] );
+
+ if( constraint )
+ {
+#if SMALLMEMORY
+ if( multidist )
+ {
+ fastshrinklocalhom_one( topol[l][0], topol[l][1], nseq-1, localhomtable, localhomshrink );
+ }
+ else
+#endif
+ {
+ fastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
+ }
+
+// msfastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
+// fprintf( stdout, "localhomshrink =\n" );
+// outlocalhompt( localhomshrink, clus1, clus2 );
+// weightimportance4( clus1, clus2, effarr1, effarr2, localhomshrink );
+// fprintf( stderr, "after weight =\n" );
+// outlocalhompt( localhomshrink, clus1, clus2 );
+ }
+ if( rnakozo && rnaprediction == 'm' )
+ {
+ makegrouprna( grouprna1, singlerna, topol[l][0] );
+ makegrouprna( grouprna2, singlerna, topol[l][1] );
+ }
+
+
+/*
+ fprintf( stderr, "before align all\n" );
+ display( aseq, nseq );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "before align 1 %s \n", indication1 );
+ display( mseq1, clus1 );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "before align 2 %s \n", indication2 );
+ display( mseq2, clus2 );
+ fprintf( stderr, "\n" );
+*/
+
+
+ if( !nevermemsave && ( constraint != 2 && alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) )
+ {
+ fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 );
+ alg = 'M';
+ if( commonIP ) FreeIntMtx( commonIP );
+ commonIP = NULL; // 2013/Jul17
+ commonAlloc1 = 0;
+ commonAlloc2 = 0;
+ }
+
+
+// if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
+ if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 );
+ else ffttry = 0;
+// ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708
+// fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 );
+// fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] );
+ if( constraint == 2 )
+ {
+ if( alg == 'M' )
+ {
+ fprintf( stderr, "\n\nMemory saving mode is not supported.\n\n" );
+ exit( 1 );
+ }
+ fprintf( stderr, "c" );
+ if( alg == 'A' )
+ {
+ imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, NULL, 1, topol[l][0], topol[l][1] );
+ if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL );
+ pscore = A__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1 );
+ }
+ else if( alg == 'Q' )
+ {
+ fprintf( stderr, "Q has been disabled.\n" );
+ exit( 1 );
+ }
+ }
+ else if( force_fft || ( use_fft && ffttry ) )
+ {
+ fprintf( stderr, "f" );
+ if( alg == 'M' )
+ {
+ fprintf( stderr, "m" );
+ pscore = Falign_udpari_long( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 );
+ }
+ else
+ pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
+ }
+ else
+ {
+ fprintf( stderr, "d" );
+ fftlog[m1] = 0;
+ switch( alg )
+ {
+ case( 'a' ):
+ pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
+ break;
+ case( 'M' ):
+ fprintf( stderr, "m" );
+ pscore = MSalignmm( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+ break;
+ case( 'A' ):
+ pscore = A__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1 );
+ break;
+ default:
+ ErrorExit( "ERROR IN SOURCE FILE" );
+ }
+ }
+
+
+ nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
+
+// fprintf( stderr, "aseq[last] = %s\n", aseq[nseq-1] );
+
+#if SCOREOUT
+ fprintf( stderr, "score = %10.2f\n", pscore );
+#endif
+ tscore += pscore;
+#if 0 // New gaps = '='
+ fprintf( stderr, "Original msa\n" );
+ for( i=0; i<clus1; i++ )
+ fprintf( stderr, "%s\n", mseq1[i] );
+ fprintf( stderr, "Query\n" );
+ for( i=0; i<clus2; i++ )
+ fprintf( stderr, "%s\n", mseq2[i] );
+#endif
+
+// writePre( nseq, name, nlen, aseq, 0 );
+
+ if( disp ) display( aseq, nseq );
+
+ if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara.
+ {
+// if( deleteadditionalinsertions ) ndeleted += deletenewinsertions( clus2, clus1, mseq2, mseq1, deleterecord );
+ adjustgapmap( strlen( mseq2[0] )-len2nocommongap+len2, gapmap, mseq2[0] );
+ restorecommongaps( nseq, nseq-(clus1+clus2), aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' );
+ findnewgaps( clus2, 0, mseq2, gaplen );
+ insertnewgaps( nseq, alreadyaligned, aseq, topol[l][1], topol[l][0], gaplen, gapmap, *alloclen, alg, '-' );
+// for( i=0; i<nseq; i++ ) eq2dash( aseq[i] );
+ for( i=0; (m=topol[l][0][i])>-1; i++ ) alreadyaligned[m] = 1;
+ }
+ if( mergeoralign[l] == '2' )
+ {
+// if( deleteadditionalinsertions ) ndeleted += deletenewinsertions( clus1, clus2, mseq1, mseq2, deleterecord );
+ adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] );
+ restorecommongaps( nseq, nseq-(clus1+clus2), aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' );
+ findnewgaps( clus1, 0, mseq1, gaplen );
+ insertnewgaps( nseq, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg, '-' );
+// for( i=0; i<nseq; i++ ) eq2dash( aseq[i] );
+ for( i=0; (m=topol[l][1][i])>-1; i++ ) alreadyaligned[m] = 1;
+ }
+
+#if 0
+ free( topol[l][0] );
+ free( topol[l][1] );
+ free( topol[l] );
+#endif
+ }
+
+//for( i=0; i<nseq; nseq++ )
+//reporterr( "In treebase() before deletenewinsertions, %s\n", aseq[i] );
+// if( keeplength ) ndeleted += deletenewinsertions_withoutusingequal( nseq-1, 1, 0, aseq, aseq+nseq-1, NULL, deletemapiadd, deletelagiadd, deletelistiadd );
+
+#if SCOREOUT
+ fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
+#endif
+ free( gaplen );
+ free( gapmap );
+ if( rnakozo && rnaprediction == 'm' )
+ {
+ free( grouprna1 );
+ free( grouprna2 );
+ }
+ free( fftlog ); // iranai
+ free( effarr1 );
+ free( effarr2 );
+ free( indication1 );
+ free( indication2 );
+ free( alreadyaligned );
+ if( constraint )
+ {
+ for( i=0; i<nseq; i++ ) free( localhomshrink[i] ); // ??
+ free( localhomshrink );
+ }
+ free( effarr1_kozo );
+ free( effarr2_kozo );
+
+
+ return( pscore );
+}
+
+
+
+
+static void mtxcpy( int norg, int njobc, double ***iscorec, double **iscore )
+{
+ int i, nlim, n;
+ double *fpt, *fptc;
+
+ *iscorec = AllocateFloatHalfMtx( njobc );
+ nlim = norg-1;
+ for( i=0; i<nlim; i++ )
+ {
+ fptc = (*iscorec)[i]+1;
+ fpt = iscore[i]+1;
+ n = norg-i-1;
+ while( n-- )
+ *fptc++ = *fpt++;
+// for( j=i+1; j<norg; j++ )
+// (*iscorec)[i][j-i] = iscore[i][j-i];
+ }
+}
+
+
+static void *addsinglethread( void *arg )
+ {
+ thread_arg_t *targ = (thread_arg_t *)arg;
+ int *nlenc = NULL;
+ char **namec = NULL;
+ Treedep *depc = NULL;
+ char **mseq1 = NULL, **mseq2 = NULL;
+ double **iscorec;
+// double **iscorecbk; // to speedup
+ double *effc = NULL;
+ int ***topolc = NULL;
+ double **lenc = NULL;
+ LocalHom **localhomtablec = NULL;
+ int *memlist0 = NULL;
+ int *memlist1 = NULL;
+ int *addmem = NULL;
+ int njobc, norg;
+ char **bseq = NULL;
+ int i, j, k, m, iadd, rep, neighbor;
+ char *mergeoralign = NULL;
+ int *nogaplenjusttodecideaddhereornot = NULL;
+ char *tmpseq = NULL;
+
+#ifdef enablemultithread
+ int thread_no = targ->thread_no;
+ int *iaddshare = targ->iaddshare;
+#endif
+ int njob = targ->njob;
+ int *follows = targ->follows;
+ int nadd = targ->nadd;
+ int *nlen = targ->nlen;
+ char **name = targ->name;
+ char **seq = targ->seq;
+ LocalHom **localhomtable = targ->localhomtable;
+ double **iscore = targ->iscore;
+ double **nscore = targ->nscore;
+ int *istherenewgap = targ->istherenewgap;
+ int **newgaplist = targ->newgaplist;
+ RNApair ***singlerna = targ->singlerna;
+ double *eff_kozo_mapped = targ->eff_kozo_mapped;
+ int alloclen = targ->alloclen;
+ Treedep *dep = targ->dep;
+ int ***topol = targ->topol;
+ double **len = targ->len;
+ Addtree *addtree = targ->addtree;
+ int **deletelist = targ->deletelist;
+ double pscore;
+ int *alnleninnode = NULL;
+ char *targetseq;
+
+
+
+// fprintf( stderr, "\nPreparing thread %d\n", thread_no );
+ norg = njob - nadd;
+ njobc = norg+1;
+
+ alnleninnode = AllocateIntVec( norg );
+ addmem = AllocateIntVec( nadd+1 );
+ depc = (Treedep *)calloc( njobc, sizeof( Treedep ) );
+ mseq1 = AllocateCharMtx( njob, 0 );
+ mseq2 = AllocateCharMtx( njob, 0 );
+ bseq = AllocateCharMtx( njobc, alloclen );
+ namec = AllocateCharMtx( njob, 0 );
+ nlenc = AllocateIntVec( njob );
+ mergeoralign = AllocateCharVec( njob );
+ nogaplenjusttodecideaddhereornot = AllocateIntVec( njobc );
+ tmpseq = calloc( alloclen, sizeof( char ) );
+
+ if( allowlongadds ) // hontou ha iranai.
+ {
+ for( i=0; i<njobc; i++ ) nogaplenjusttodecideaddhereornot[i] = 0;
+ }
+ else
+ {
+ for( i=0; i<norg; i++ )
+ {
+ gappick0( tmpseq, seq[i] );
+ nogaplenjusttodecideaddhereornot[i] = strlen( tmpseq );
+ }
+ }
+
+ for( i=0; i<norg; i++ ) strcpy( bseq[i], seq[i] );
+ if( norg == 1 )
+ {
+ alnleninnode[0] = strlen( bseq[0] );
+ }
+ else
+ {
+ for( i=norg-2; i>=0; i-- )
+// for( i=norg-2; i; i-- ) // BUG!!!!
+ {
+// reporterr( "\nstep %d\n", i );
+ k = 0;
+ for( j=0; (m=topol[i][0][j])!=-1; j++ )
+ {
+ mseq1[k++] = bseq[m];
+// reporterr( "%d ", m );
+ }
+ for( j=0; (m=topol[i][1][j])!=-1; j++ )
+ {
+ mseq1[k++] = bseq[m];
+// reporterr( "%d ", m );
+ }
+// reporterr( "\n" );
+ commongappick( k, mseq1 );
+ alnleninnode[i] = strlen( mseq1[0] );
+// fprintf( stderr, "alnleninnode[%d] = %d\n", i, alnleninnode[i] );
+ }
+ }
+// for( i=0; i<norg-1; i++ )
+// fprintf( stderr, "alnleninnode[%d] = %d\n", i, alnleninnode[i] );
+
+
+ if( constraint )
+ {
+ localhomtablec = (LocalHom **)calloc( njobc, sizeof( LocalHom *) ); // motto chiisaku dekiru.
+#if SMALLMEMORY
+ if( multidist )
+ {
+ for( i=0; i<njobc; i++) localhomtablec[i] = (LocalHom *)calloc( 1, sizeof( LocalHom ) ); // motto chiisaku dekiru.
+ }
+ else
+#endif
+ {
+ for( i=0; i<njobc; i++) localhomtablec[i] = (LocalHom *)calloc( njobc, sizeof( LocalHom ) ); // motto chiisaku dekiru.
+ for( i=0; i<norg; i++ ) for( j=0; j<norg; j++ ) localhomtablec[i][j] = localhomtable[i][j]; // iru!
+ }
+ }
+
+
+ topolc = AllocateIntCub( njobc, 2, 0 );
+ lenc = AllocateFloatMtx( njobc, 2 );
+ effc = AllocateDoubleVec( njobc );
+// for( i=0; i<norg; i++ ) nlenc[i] = strlen( seq[i] );
+ for( i=0; i<norg; i++ ) nlenc[i] = nlen[i];
+ for( i=0; i<norg; i++ ) namec[i] = name[i];
+ memlist0 = AllocateIntVec( norg+1 );
+ memlist1 = AllocateIntVec( 2 );
+ for( i=0; i<norg; i++ ) memlist0[i] = i;
+ memlist0[norg] = -1;
+
+// fprintf( stderr, "\ndone. %d\n", thread_no );
+
+// mtxcpy( norg, norg, &iscorecbk, iscore ); // to speedup?
+
+
+ iadd = -1;
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( nthread )
+ {
+ pthread_mutex_lock( targ->mutex_counter );
+ iadd = *iaddshare;
+ if( iadd == nadd )
+ {
+ pthread_mutex_unlock( targ->mutex_counter );
+ break;
+ }
+ fprintf( stderr, "\r%d / %d (thread %d) \r", iadd, nadd, thread_no );
+ ++(*iaddshare);
+ targetseq = seq[norg+iadd];
+ pthread_mutex_unlock( targ->mutex_counter );
+ }
+ else
+#endif
+ {
+ iadd++;
+ if( iadd == nadd ) break;
+ targetseq = seq[norg+iadd];
+ fprintf( stderr, "\r%d / %d \r", iadd, nadd );
+ }
+
+ for( i=0; i<norg; i++ ) strcpy( bseq[i], seq[i] );
+// gappick0( bseq[norg], seq[norg+iadd] );
+ gappick0( bseq[norg], targetseq );
+
+ if( allowlongadds ) // missed in v7.220
+ nogaplenjusttodecideaddhereornot[norg] = 0;
+ else
+ nogaplenjusttodecideaddhereornot[norg] = strlen( bseq[norg] );
+
+ mtxcpy( norg, njobc, &iscorec, iscore );
+
+ if( multidist || tuplesize > 0 )
+ {
+ for( i=0; i<norg; i++ ) iscorec[i][norg-i] = nscore[i][iadd];
+ }
+ else
+ {
+ for( i=0; i<norg; i++ ) iscorec[i][norg-i] = iscore[i][norg+iadd-i];
+ }
+
+
+#if 0
+ for( i=0; i<njobc-1; i++ )
+ {
+ fprintf( stderr, "i=%d\n", i );
+ for( j=i+1; j<njobc; j++ )
+ {
+ fprintf( stderr, "%d-%d, %f\n", i, j, iscorec[i][j-i] );
+ }
+ }
+#endif
+ nlenc[norg] = nlen[norg+iadd];
+ namec[norg] = name[norg+iadd];
+ if( constraint)
+ {
+ for( i=0; i<norg; i++ )
+ {
+#if SMALLMEMORY
+ if( multidist )
+ {
+ localhomtablec[i][0] = localhomtable[i][iadd];
+// localhomtablec[norg][i] = localhomtable[norg+iadd][i];
+ }
+ else
+#endif
+ {
+ localhomtablec[i][norg] = localhomtable[i][norg+iadd];
+ localhomtablec[norg][i] = localhomtable[norg+iadd][i];
+ }
+ }
+// localhomtablec[norg][norg] = localhomtable[norg+iadd][norg+iadd]; // iranai!!
+ }
+
+// fprintf( stderr, "Constructing a UPGMA tree %d ... ", iadd );
+// fflush( stderr );
+
+
+// if( iadd == 0 )
+// {
+// }
+// fixed_musclesupg_double_realloc_nobk_halfmtx( njobc, iscorec, topolc, lenc, depc, 0, 1 );
+ neighbor = addonetip( njobc, topolc, lenc, iscorec, topol, len, dep, treeout, addtree, iadd, name, alnleninnode, nogaplenjusttodecideaddhereornot, noalign );
+
+
+ FreeFloatHalfMtx( iscorec, njobc );
+
+
+ if( tbrweight )
+ {
+ weight = 3;
+ counteff_simple_double_nostatic( njobc, topolc, lenc, effc );
+ }
+ else
+ {
+ for( i=0; i<njobc; i++ ) effc[i] = 1.0;
+ }
+
+// FreeFloatMtx( lenc );
+
+ if( noalign ) // nen no tame weight wo keisan.
+ {
+// FreeFloatHalfMtx( iscorec, njobc ); // saki ni continue suru baai ha fukkatsu.
+ continue;
+ }
+
+// reporterr( "iadd = %d\n", iadd );
+
+#if 0
+ for( i=0; i<njobc-1; i++ )
+ {
+ fprintf( stderr, "\n step %d\n", i );
+ fprintf( stderr, "topol[%d] = \n", i );
+ for( j=0; topolc[i][0][j]!=-1; j++ ) fprintf( stderr, "%d ", topolc[i][0][j] );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "len=%f\n", lenc [i][0] );
+ for( j=0; topolc[i][1][j]!=-1; j++ ) fprintf( stderr, "%d ", topolc[i][1][j] );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "len=%f\n", lenc [i][1] );
+ }
+
+ fprintf( stderr, "\nneighbor = %d, iadd = %d\n", neighbor, iadd );
+#endif
+ follows[iadd] = neighbor;
+
+ for( i=0; i<njobc-1; i++ ) mergeoralign[i] = 'n';
+ for( j=njobc-1; j<njobc; j++ )
+ {
+ addmem[0] = j;
+ addmem[1] = -1;
+ for( i=0; i<njobc-1; i++ )
+ {
+ if( samemembern( topolc[i][0], addmem, 1 ) ) // arieru
+ {
+// fprintf( stderr, "HIT!\n" );
+ if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
+ else mergeoralign[i] = '1';
+ }
+ else if( samemembern( topolc[i][1], addmem, 1 ) )
+ {
+// fprintf( stderr, "HIT!\n" );
+ if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
+ else mergeoralign[i] = '2';
+ }
+ }
+ }
+
+// for( i=0; i<1; i++ ) addmem[i] = njobc-1+i;
+ addmem[0] = njobc-1;
+ addmem[1] = -1;
+ for( i=0; i<njobc-1; i++ )
+ {
+ if( includemember( topolc[i][0], addmem ) && includemember( topolc[i][1], addmem ) )
+ {
+ mergeoralign[i] = 'w';
+ }
+ else if( includemember( topolc[i][0], addmem ) )
+ {
+ mergeoralign[i] = '1';
+// fprintf( stderr, "HIT 1! iadd=%d", iadd );
+ }
+ else if( includemember( topolc[i][1], addmem ) )
+ {
+ mergeoralign[i] = '2';
+// fprintf( stderr, "HIT 2! iadd=%d", iadd );
+ }
+ }
+#if 0
+ for( i=0; i<njob-1; i++ )
+ {
+ fprintf( stderr, "mem0 = " );
+ for( j=0; topol[i][0][j]>-1; j++ ) fprintf( stderr, "%d ", topol[i][0][j] );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "mem1 = " );
+ for( j=0; topol[i][1][j]>-1; j++ ) fprintf( stderr, "%d ", topol[i][1][j] );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "i=%d, mergeoralign[] = %c\n", i, mergeoralign[i] );
+ }
+#endif
+
+
+#if 0
+ for( i=0; i<norg; i++ ) fprintf( stderr, "seq[%d, iadd=%d] = \n%s\n", i, iadd, seq[i] );
+ fprintf( stderr, "gapmapS (iadd=%d) = \n", iadd );
+ for( i=0; i<lennocommongap; i++ ) fprintf( stderr, "%d\n", gapmapS[i] );
+#endif
+
+
+// fprintf( stderr, "Progressive alignment ... \r" );
+
+#if 0
+ pthread_mutex_lock( targ->mutex_counter );
+ fprintf( stdout, "\nmergeoralign (iadd=%d) = ", iadd );
+ for( i=0; i<njobc-1; i++ ) fprintf( stdout, "%c", mergeoralign[i] );
+ fprintf( stdout, "\n" );
+ pthread_mutex_unlock( targ->mutex_counter );
+#endif
+
+ singlerna = NULL;
+ pscore = treebase( njobc, nlenc, bseq, 1, mergeoralign, mseq1, mseq2, topolc, effc, &alloclen, localhomtablec, singlerna, eff_kozo_mapped );
+#if 0
+ pthread_mutex_lock( targ->mutex_counter );
+// fprintf( stdout, "res (iadd=%d) = %s, pscore=%f\n", iadd, bseq[norg], pscore );
+// fprintf( stdout, "effc (iadd=%d) = ", iadd );
+// for( i=0; i<njobc; i++ ) fprintf( stdout, "%f ", effc[i] );
+// fprintf( stdout, "\n" );
+ pthread_mutex_unlock( targ->mutex_counter );
+#endif
+
+
+#if 0
+ fprintf( trap_g, "done.\n" );
+ fclose( trap_g );
+#endif
+// fprintf( stdout, "\n>seq[%d, iadd=%d] = \n%s\n", norg+iadd, iadd, seq[norg+iadd] );
+// fprintf( stdout, "\n>bseq[%d, iadd=%d] = \n%s\n", norg, iadd, bseq[norg] );
+
+// strcpy( seq[norg+iadd], bseq[norg] );
+
+
+ if( keeplength )
+ {
+// reporterr( "deletelist = %p\n", deletelist );
+// reporterr( "deletelist+iadd = %p\n", deletelist+iadd );
+ ndeleted += deletenewinsertions_whole_eq( norg, 1, bseq, bseq+norg, deletelist+iadd );
+// for( i=0; i<norg+1; i++ ) reporterr( ">\n%s\n", bseq[i] );
+ strcpy( targetseq, bseq[norg] );
+ i = norg; // no new gap!!
+ }
+ else
+ {
+ strcpy( targetseq, bseq[norg] );
+ rep = -1;
+ for( i=0; i<norg; i++ )
+ {
+// fprintf( stderr, "Checking %d/%d\n", i, norg );
+ if( strchr( bseq[i], '=' ) ) break;
+ }
+ }
+
+ if( i == norg )
+ istherenewgap[iadd] = 0;
+ else
+ {
+ rep = i;
+ istherenewgap[iadd] = 1;
+
+
+ makenewgaplist( newgaplist[iadd], bseq[rep] );
+// for( i=0; newgaplist[iadd][i]!=-1; i++ ) fprintf( stderr, "%d: %d\n", i, newgaplist[iadd][i] );
+ }
+ eq2dash( targetseq );
+
+ }
+
+
+#if 1
+ if( constraint && localhomtablec )
+ {
+ for( i=0; i<njobc; i++ ) free( localhomtablec[i] );
+ free( localhomtablec );
+ localhomtablec = NULL;
+ }
+ if( mergeoralign ) free( mergeoralign ); mergeoralign = NULL;
+ if( nogaplenjusttodecideaddhereornot ) free( nogaplenjusttodecideaddhereornot ); nogaplenjusttodecideaddhereornot = NULL;
+ if( alnleninnode ) free( alnleninnode ); alnleninnode = NULL;
+ if( tmpseq ) free( tmpseq ); tmpseq = NULL;
+ if( bseq ) FreeCharMtx( bseq ); bseq = NULL;
+ if( namec ) free( namec ); namec = NULL;
+ if( nlenc ) free( nlenc ); nlenc = NULL;
+ if( depc ) free( depc ); depc = NULL;
+ if( topolc ) FreeIntCub( topolc ); topolc = NULL;
+ if( lenc ) FreeFloatMtx( lenc ); lenc = NULL;
+ if( effc ) FreeDoubleVec( effc ); effc = NULL;
+ if( memlist0 ) free( memlist0 ); memlist0 = NULL;
+ if( memlist1 ) free( memlist1 ); memlist1 = NULL;
+ if( addmem ) free( addmem ); addmem = NULL;
+ if( mseq1 ) free( mseq1 ); mseq1 = NULL;
+ if( mseq2 ) free( mseq2 ); mseq2 = NULL;
+ Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+ A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 );
+ if( commonIP ) FreeIntMtx( commonIP );
+ commonIP = NULL;
+ commonAlloc1 = commonAlloc2 = 0;
+#endif
+// FreeFloatHalfMtx( iscorecbk, norg );
+
+ return( NULL );
+ }
+
+static int nunknown = 0;
+
+void seq_grp_nuc( int *grp, char *seq )
+{
+ int tmp;
+ int *grpbk = grp;
+ while( *seq )
+ {
+ tmp = amino_grp[(int)*seq++];
+ if( tmp < 4 )
+ *grp++ = tmp;
+ else
+ nunknown++;
+ }
+ *grp = END_OF_VEC;
+ if( grp - grpbk < tuplesize )
+ {
+// fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
+// exit( 1 );
+ *grpbk = -1;
+ }
+}
+
+void seq_grp( int *grp, char *seq )
+{
+ int tmp;
+ int *grpbk = grp;
+ while( *seq )
+ {
+ tmp = amino_grp[(int)*seq++];
+ if( tmp < 6 )
+ *grp++ = tmp;
+ else
+ nunknown++;
+ }
+ *grp = END_OF_VEC;
+ if( grp - grpbk < 6 )
+ {
+// fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
+// exit( 1 );
+ *grpbk = -1;
+ }
+}
+
+void makecompositiontable_p( int *table, int *pointt )
+{
+ int point;
+
+ while( ( point = *pointt++ ) != END_OF_VEC )
+ table[point]++;
+}
+
+
+void makepointtable_nuc_dectet( int *pointt, int *n )
+{
+ int point;
+ register int *p;
+
+ if( *n == -1 )
+ {
+ *pointt = -1;
+ return;
+ }
+
+ p = n;
+ point = *n++ *262144;
+ point += *n++ * 65536;
+ point += *n++ * 16384;
+ point += *n++ * 4096;
+ point += *n++ * 1024;
+ point += *n++ * 256;
+ point += *n++ * 64;
+ point += *n++ * 16;
+ point += *n++ * 4;
+ point += *n++;
+ *pointt++ = point;
+
+ while( *n != END_OF_VEC )
+ {
+ point -= *p++ *262144;
+ point *= 4;
+ point += *n++;
+ *pointt++ = point;
+
+ }
+ *pointt = END_OF_VEC;
+}
+
+void makepointtable_nuc_octet( int *pointt, int *n )
+{
+ int point;
+ register int *p;
+
+ if( *n == -1 )
+ {
+ *pointt = -1;
+ return;
+ }
+
+ p = n;
+ point = *n++ * 16384;
+ point += *n++ * 4096;
+ point += *n++ * 1024;
+ point += *n++ * 256;
+ point += *n++ * 64;
+ point += *n++ * 16;
+ point += *n++ * 4;
+ point += *n++;
+ *pointt++ = point;
+
+ while( *n != END_OF_VEC )
+ {
+ point -= *p++ * 16384;
+ point *= 4;
+ point += *n++;
+ *pointt++ = point;
+ }
+ *pointt = END_OF_VEC;
+}
+
+void makepointtable_nuc( int *pointt, int *n )
+{
+ int point;
+ register int *p;
+
+ if( *n == -1 )
+ {
+ *pointt = -1;
+ return;
+ }
+
+ p = n;
+ point = *n++ * 1024;
+ point += *n++ * 256;
+ point += *n++ * 64;
+ point += *n++ * 16;
+ point += *n++ * 4;
+ point += *n++;
+ *pointt++ = point;
+
+ while( *n != END_OF_VEC )
+ {
+ point -= *p++ * 1024;
+ point *= 4;
+ point += *n++;
+ *pointt++ = point;
+ }
+ *pointt = END_OF_VEC;
+}
+
+void makepointtable( int *pointt, int *n )
+{
+ int point;
+ register int *p;
+
+ if( *n == -1 )
+ {
+ *pointt = -1;
+ return;
+ }
+
+ p = n;
+ point = *n++ * 7776;
+ point += *n++ * 1296;
+ point += *n++ * 216;
+ point += *n++ * 36;
+ point += *n++ * 6;
+ point += *n++;
+ *pointt++ = point;
+
+ while( *n != END_OF_VEC )
+ {
+ point -= *p++ * 7776;
+ point *= 6;
+ point += *n++;
+ *pointt++ = point;
+ }
+ *pointt = END_OF_VEC;
+}
+
+#ifdef enablemultithread
+
+void *dndprethread( void *arg )
+{
+ dndprethread_arg_t *targ = (dndprethread_arg_t *)arg;
+ int njob = targ->njob;
+ int thread_no = targ->thread_no;
+ double *selfscore = targ->selfscore;
+ double **mtx = targ->mtx;
+ char **seq = targ->seq;
+ Jobtable2d *jobpospt = targ->jobpospt;
+
+ int i, j;
+ double ssi, ssj, bunbo;
+ double mtxv;
+
+ if( njob == 1 ) return( NULL );
+
+ while( 1 )
+ {
+ pthread_mutex_lock( targ->mutex );
+ j = jobpospt->j;
+ i = jobpospt->i;
+ j++;
+// fprintf( stderr, "\n i=%d, j=%d before check\n", i, j );
+ if( j == njob )
+ {
+// fprintf( stderr, "\n j = %d, i = %d, njob = %d\n", j, i, njob );
+ fprintf( stderr, "%4d/%4d (thread %4d), dndpre\r", i+1, njob, thread_no );
+ i++;
+ j = i + 1;
+ if( i == njob-1 )
+ {
+// fprintf( stderr, "\n i=%d, njob-1=%d\n", i, njob-1 );
+ pthread_mutex_unlock( targ->mutex );
+ return( NULL );
+ }
+ }
+// fprintf( stderr, "\n i=%d, j=%d after check\n", i, j );
+ jobpospt->j = j;
+ jobpospt->i = i;
+ pthread_mutex_unlock( targ->mutex );
+
+ ssi = selfscore[i];
+ ssj = selfscore[j];
+
+ bunbo = MIN( ssi, ssj );
+ if( bunbo == 0.0 )
+ mtxv = maxdist;
+ else
+ mtxv = maxdist * ( 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty * 10 ) / bunbo );
+#if 1
+ if( mtxv < 0.0 )
+ {
+ fprintf( stderr, "WARNING: distance %d-%d is strange, %f.\n", i, j, mtxv );
+ mtxv = 0.0;
+// exit( 1 ); // 2016/Aug/3
+ }
+ if( mtxv > 9.9 )
+ {
+ fprintf( stderr, "WARNING: distance %d-%d is strange, %f.\n", i, j, mtxv );
+ mtxv = 9.9;
+// exit( 1 ); // 2016/Aug/3
+ }
+#else // CHUUI!!! 2012/05/16
+ if( mtxv > 2.0 )
+ {
+ mtxv = 2.0;
+ }
+ if( mtxv < 0.0 )
+ {
+ fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv );
+ exit( 1 );
+ }
+#endif
+ mtx[i][j-i] = mtxv;
+ }
+}
+
+static void *gaplist2alnxthread( void *arg )
+{
+ gaplist2alnxthread_arg_t *targ = (gaplist2alnxthread_arg_t *)arg;
+// int thread_no = targ->thread_no;
+ int ncycle = targ->ncycle;
+ char **seq = targ->seq;
+ int *newgaplist = targ->newgaplist;
+ int *posmap = targ->posmap;
+ int *jobpospt = targ->jobpospt;
+ int tmpseqlen = targ->tmpseqlen;
+ int lenfull = targ->lenfull;
+ char *tmpseq1;
+ int i;
+
+ tmpseq1 = AllocateCharVec( tmpseqlen );
+
+ while( 1 )
+ {
+ pthread_mutex_lock( targ->mutex );
+ i = *jobpospt;
+ if( i == ncycle )
+ {
+ pthread_mutex_unlock( targ->mutex );
+ free( tmpseq1 );
+ return( NULL );
+ }
+ *jobpospt = i+1;
+ pthread_mutex_unlock( targ->mutex );
+
+ gaplist2alnx( lenfull, tmpseq1, seq[i], newgaplist, posmap, tmpseqlen );
+// fprintf( stderr, ">%s (iadd=%d)\n%s\n", name[i], iadd, tmpseq1 );
+ strcpy( seq[i], tmpseq1 );
+ }
+}
+
+static void *distancematrixthread( void *arg )
+{
+ distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
+ int thread_no = targ->thread_no;
+ int njob = targ->njob;
+ int norg = targ->norg;
+ int *jobpospt = targ->jobpospt;
+ int **pointt = targ->pointt;
+ double **imtx = targ->imtx;
+ double **nmtx = targ->nmtx;
+ double *selfscore = targ->selfscore;
+ int *nogaplen = targ->nogaplen;
+
+ double lenfac, bunbo, longer, shorter, mtxv;
+ int *table1;
+ int i, j;
+
+ while( 1 )
+ {
+ pthread_mutex_lock( targ->mutex );
+ i = *jobpospt;
+ if( i == norg )
+ {
+ pthread_mutex_unlock( targ->mutex );
+ commonsextet_p( NULL, NULL );
+ return( NULL );
+ }
+ *jobpospt = i+1;
+ pthread_mutex_unlock( targ->mutex );
+
+ table1 = (int *)calloc( tsize, sizeof( int ) );
+ if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+ if( i % 100 == 0 )
+ {
+ fprintf( stderr, "\r% 5d / %d (thread %4d)", i+1, norg, thread_no );
+ }
+ makecompositiontable_p( table1, pointt[i] );
+
+ for( j=i+1; j<njob; j++ )
+ {
+ mtxv = (double)commonsextet_p( table1, pointt[j] );
+ if( nogaplen[i] > nogaplen[j] )
+ {
+ longer=(double)nogaplen[i];
+ shorter=(double)nogaplen[j];
+ }
+ else
+ {
+ longer=(double)nogaplen[j];
+ shorter=(double)nogaplen[i];
+ }
+ lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
+ bunbo = MIN( selfscore[i], selfscore[j] );
+
+ if( j < norg )
+ {
+ if( bunbo == 0.0 )
+ imtx[i][j-i] = maxdist;
+ else
+ imtx[i][j-i] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac;
+
+ }
+ else
+ {
+ if( bunbo == 0.0 )
+ nmtx[i][j-norg] = maxdist;
+ else
+ nmtx[i][j-norg] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac;
+ }
+ }
+ free( table1 );
+
+// for( j=i+1; j<norg; j++ )
+// imtx[i][j-i] = (double)commonsextet_p( table1, pointt[j] );
+// for( j=norg; j<njob; j++ )
+// nmtx[i][j-norg] = (double)commonsextet_p( table1, pointt[j] );
+// free( table1 );
+ }
+}
+#endif
+
+
+void ktupledistancematrix( int nseq, int norg, int nlenmax, char **seq, char **name, double **imtx, double **nmtx )
+{
+ char *tmpseq;
+ int *grpseq;
+ int **pointt;
+ int i, j;
+ int *nogaplen;
+ int *table1;
+ double lenfac, bunbo, longer, shorter, mtxv;
+ double *selfscore;
+ selfscore = AllocateFloatVec( nseq );
+
+ fprintf( stderr, "\n\nMaking a distance matrix ..\n" );
+ fflush( stderr );
+
+ tmpseq = AllocateCharVec( nlenmax+1 );
+ grpseq = AllocateIntVec( nlenmax+1 );
+ pointt = AllocateIntMtx( nseq, nlenmax+1 );
+ nogaplen = AllocateIntVec( nseq );
+
+ if( dorp == 'd' ) tsize = (int)pow( 4, tuplesize );
+ else tsize = (int)pow( 6, 6 );
+
+ if( dorp == 'd' && tuplesize == 6 )
+ {
+ lenfaca = D6LENFACA;
+ lenfacb = D6LENFACB;
+ lenfacc = D6LENFACC;
+ lenfacd = D6LENFACD;
+ }
+ else if( dorp == 'd' && tuplesize == 10 )
+ {
+ lenfaca = D10LENFACA;
+ lenfacb = D10LENFACB;
+ lenfacc = D10LENFACC;
+ lenfacd = D10LENFACD;
+ }
+ else
+ {
+ lenfaca = PLENFACA;
+ lenfacb = PLENFACB;
+ lenfacc = PLENFACC;
+ lenfacd = PLENFACD;
+ }
+
+ maxl = 0;
+ for( i=0; i<nseq; i++ )
+ {
+ gappick0( tmpseq, seq[i] );
+ nogaplen[i] = strlen( tmpseq );
+ if( nogaplen[i] < 6 )
+ {
+// fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nogaplen[i] );
+// fprintf( stderr, "Please use mafft-ginsi, mafft-linsi or mafft-ginsi\n\n\n" );
+// exit( 1 );
+ }
+ if( nogaplen[i] > maxl ) maxl = nogaplen[i];
+ if( dorp == 'd' ) /* nuc */
+ {
+ seq_grp_nuc( grpseq, tmpseq );
+// makepointtable_nuc( pointt[i], grpseq );
+// makepointtable_nuc_octet( pointt[i], grpseq );
+ if( tuplesize == 10 )
+ makepointtable_nuc_dectet( pointt[i], grpseq );
+ else if( tuplesize == 6 )
+ makepointtable_nuc( pointt[i], grpseq );
+ else
+ {
+ fprintf( stderr, "tuplesize=%d: not supported\n", tuplesize );
+ exit( 1 );
+ }
+ }
+ else /* amino */
+ {
+ seq_grp( grpseq, tmpseq );
+ makepointtable( pointt[i], grpseq );
+ }
+
+ }
+ if( nunknown ) fprintf( stderr, "\nThere are %d ambiguous characters\n", nunknown );
+
+ for( i=0; i<nseq; i++ ) // serial de jubun
+ {
+ table1 = (int *)calloc( tsize, sizeof( int ) );
+ if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+ makecompositiontable_p( table1, pointt[i] );
+
+ selfscore[i] = (double)commonsextet_p( table1, pointt[i] );
+ free( table1 );
+ }
+
+#ifdef enablemultithread
+ if( nthread > 0 )
+ {
+ distancematrixthread_arg_t *targ;
+ int jobpos;
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+
+ jobpos = 0;
+ targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex, NULL );
+
+ for( i=0; i<nthread; i++ )
+ {
+ targ[i].thread_no = i;
+ targ[i].njob = nseq;
+ targ[i].norg = norg;
+ targ[i].jobpospt = &jobpos;
+ targ[i].pointt = pointt;
+ targ[i].imtx = imtx;
+ targ[i].nmtx = nmtx;
+ targ[i].selfscore = selfscore;
+ targ[i].nogaplen = nogaplen;
+ targ[i].mutex = &mutex;
+
+ pthread_create( handle+i, NULL, distancematrixthread, (void *)(targ+i) );
+ }
+
+ for( i=0; i<nthread; i++ )
+ {
+ pthread_join( handle[i], NULL );
+ }
+ pthread_mutex_destroy( &mutex );
+ free( handle );
+ free( targ );
+
+ }
+ else
+#endif
+ {
+ for( i=0; i<norg; i++ )
+ {
+ table1 = (int *)calloc( tsize, sizeof( int ) );
+ if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+ if( i % 100 == 0 )
+ {
+ fprintf( stderr, "\r% 5d / %d", i+1, norg );
+ fflush( stderr );
+ }
+ makecompositiontable_p( table1, pointt[i] );
+
+ for( j=i+1; j<nseq; j++ )
+ {
+ mtxv = (double)commonsextet_p( table1, pointt[j] );
+ if( nogaplen[i] > nogaplen[j] )
+ {
+ longer=(double)nogaplen[i];
+ shorter=(double)nogaplen[j];
+ }
+ else
+ {
+ longer=(double)nogaplen[j];
+ shorter=(double)nogaplen[i];
+ }
+ lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
+ bunbo = MIN( selfscore[i], selfscore[j] );
+
+ if( j < norg )
+ {
+ if( bunbo == 0.0 )
+ imtx[i][j-i] = maxdist;
+ else
+ imtx[i][j-i] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac;
+ }
+ else
+ {
+ if( bunbo == 0.0 )
+ nmtx[i][j-norg] = maxdist;
+ else
+ nmtx[i][j-norg] = maxdist * ( 1.0 - mtxv / bunbo ) * lenfac;
+ }
+ }
+ free( table1 );
+ }
+ }
+
+ fprintf( stderr, "\ndone.\n\n" );
+ fflush( stderr );
+
+ free( grpseq );
+ free( tmpseq );
+ FreeIntMtx( pointt );
+ free( nogaplen );
+ free( selfscore );
+
+
+ if( hitout<0.0 )
+ {
+ fprintf( stdout, "Threshold=%f\n\n", -hitout );
+ for( i=0; i<norg; i++ )
+ {
+ for( j=norg; j<nseq; j++ )
+ {
+ if( nmtx[i][j-norg] < -hitout )
+ break;
+ }
+ if( j<nseq )
+ {
+ fprintf( stdout, "%s may be similar to:\n", name[i]+1 );
+ for( j=norg; j<nseq; j++ )
+ {
+ if( nmtx[i][j-norg] < -hitout )
+ fprintf( stdout, " %s, %f\n", name[j]+1, nmtx[i][j-norg] );
+ }
+ fprintf( stdout, "\n" );
+ }
+ }
+ exit( 1 );
+ }
+ if( hitout>0.0 )
+ {
+ fprintf( stdout, "Threshold=%f\n\n", hitout );
+ for( i=norg; i<nseq; i++ )
+ {
+ for( j=0; j<norg; j++ )
+ {
+ if( nmtx[j][i-norg] < hitout )
+ break;
+ }
+ if( j<norg )
+ {
+ fprintf( stdout, "%s may be similar to:\n", name[i]+1 );
+ for( j=0; j<norg; j++ )
+ {
+ if( nmtx[j][i-norg] < hitout )
+ fprintf( stdout, " %s, %f\n", name[j]+1, nmtx[j][i-norg] );
+ }
+ fprintf( stdout, "\n" );
+ }
+ }
+ exit( 1 );
+ }
+
+#if 0 // writehat2 wo kakinaosu
+ if( distout )
+ {
+ hat2p = fopen( "hat2", "w" );
+ WriteFloatHat2_pointer_halfmtx( hat2p, nseq, name, mtx );
+ fclose( hat2p );
+ }
+#endif
+}
+
+void dndpre( int nseq, char **seq, double **mtx ) // not used yet
+{
+ int i, j, ilim;
+ double *selfscore;
+ double mtxv;
+ double ssi, ssj, bunbo;
+
+ selfscore = AllocateFloatVec( nseq );
+
+ for( i=0; i<nseq; i++ )
+ {
+ selfscore[i] = (double)naivepairscore11( seq[i], seq[i], 0 );
+ }
+#ifdef enablemultithread
+ if( nthread > 0 )
+ {
+ dndprethread_arg_t *targ;
+ Jobtable2d jobpos;
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+
+ jobpos.i = 0;
+ jobpos.j = 0;
+
+ targ = calloc( nthread, sizeof( dndprethread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex, NULL );
+
+ for( i=0; i<nthread; i++ )
+ {
+ targ[i].thread_no = i;
+ targ[i].njob = nseq;
+ targ[i].selfscore = selfscore;
+ targ[i].mtx = mtx;
+ targ[i].seq = seq;
+ targ[i].jobpospt = &jobpos;
+ targ[i].mutex = &mutex;
+
+ pthread_create( handle+i, NULL, dndprethread, (void *)(targ+i) );
+ }
+
+ for( i=0; i<nthread; i++ )
+ {
+ pthread_join( handle[i], NULL );
+ }
+ pthread_mutex_destroy( &mutex );
+
+ }
+ else
+#endif
+ {
+ ilim = nseq-1;
+ for( i=0; i<ilim; i++ )
+ {
+ ssi = selfscore[i];
+ fprintf( stderr, "%4d/%4d\r", i+1, nseq );
+
+ for( j=i+1; j<nseq; j++ )
+ {
+ ssj = selfscore[j];
+ bunbo = MIN( ssi, ssj );
+ if( bunbo == 0.0 )
+ mtxv = maxdist;
+ else
+ mtxv = maxdist * ( 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty * 10 ) / bunbo );
+
+#if 1
+ if( mtxv < 0.0 )
+ {
+ fprintf( stderr, "WARNING: distance %d-%d is strange, %f.\n", i, j, mtxv );
+ mtxv = 0.0;
+// exit( 1 ); // 2016/Aug/3
+ }
+ if( mtxv > 9.9 )
+ {
+ fprintf( stderr, "WARNING: distance %d-%d is strange, %f.\n", i, j, mtxv );
+ mtxv = 9.9;
+// exit( 1 ); // 2016/Aug/3
+ }
+#else // CHUUI!!! 2012/05/16
+ if( mtxv > 2.0 )
+ {
+ mtxv = 2.0;
+ }
+ if( mtxv < 0.0 )
+ {
+ fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv );
+ exit( 1 );
+ }
+#endif
+ mtx[i][j-i] = mtxv;
+ }
+ }
+ }
+
+#if TEST
+ for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+ fprintf( stdout, "i=%d, j=%d, mtx[][] = %f\n", i, j, mtx[i][j] );
+#endif
+ free( selfscore );
+
+}
+
+static int searchlet( char *p1, char *p2 )
+{
+ char *p;
+ for( p=p1; p<=p2; p++ )
+ {
+ if( *p != '-' )
+ {
+ return( p-p1 );
+ }
+ }
+ return( -1 );
+}
+
+static void smoothing2( int njob, int nadd, int lenfull, char **seq, Blocktorealign *realign )
+{
+ int i, j, norg = njob-nadd;
+
+ reporterr( "Smoothing2\n" );
+ for( i=2; i<lenfull+1; i++ )
+ {
+ int postoshiftfrom;
+ int shiftonadd;
+ int postoshiftto;
+ if( realign[i-2].nnewres && realign[i-1].nnewres == 0 && realign[i].nnewres )
+ {
+ postoshiftto = realign[i-2].start;
+ postoshiftfrom = realign[i-2].end + 1;
+ if( postoshiftfrom != realign[i].start -2 )
+ {
+ reporterr( "Unexpected pattern??? i=%d, realign[i-1].end=%d, realign[i].start=%d\n", i, realign[i-1].end, realign[i].start );
+ exit( 1 );
+ }
+
+ realign[i].nnewres += realign[i-2].nnewres;
+ realign[i].start = realign[i-2].start+2;
+ realign[i-2].nnewres = 0;
+ realign[i-2].start = realign[i-2].end = 0;
+
+
+// reporterr( "SHIFT %d -> %d\n", postoshiftfrom, postoshiftto );
+ for( j=0; j<norg; j++ )
+ {
+ if( seq[j][postoshiftto] != '-' )
+ {
+ reporterr( "Unexpected pattern 2. i=%d, postoshiftto=%d, postoshiftfrom=%d, seq[%d][%d]=%c\n", i, postoshiftto, postoshiftfrom, j, postoshiftto, seq[j][postoshiftto] );
+// reporterr( seq[j] );
+ exit( 1 );
+ }
+ seq[j][postoshiftto] = seq[j][postoshiftfrom];
+ seq[j][postoshiftfrom] = '-';
+
+ if( seq[j][postoshiftto+1] != '-' )
+ {
+ reporterr( "Unexpected pattern 3???\n" );
+ exit( 1 );
+ }
+ seq[j][postoshiftto+1] = seq[j][postoshiftfrom+1];
+ seq[j][postoshiftfrom+1] = '-';
+ }
+ for( j=norg; j<njob; j++ )
+ {
+ if( seq[j][postoshiftto] == '-' )
+ {
+ shiftonadd = searchlet( seq[j]+postoshiftto, seq[j]+postoshiftfrom );
+ if( shiftonadd != -1 )
+ {
+ seq[j][postoshiftto] = seq[j][postoshiftto+shiftonadd];
+ seq[j][postoshiftto+shiftonadd] = '-';
+ }
+ }
+ if( seq[j][postoshiftto+1] == '-' )
+ {
+ shiftonadd = searchlet( seq[j]+postoshiftto+1, seq[j]+postoshiftfrom+1 );
+ if( shiftonadd != -1 )
+ {
+ seq[j][postoshiftto+1] = seq[j][postoshiftto+1+shiftonadd];
+ seq[j][postoshiftto+1+shiftonadd] = '-';
+ }
+ }
+ }
+ }
+ }
+// for( i=0; i<lenfull+1; i++ ) fprintf( stderr, "i=%d, nnewres=%d, start=%d, end=%d\n", i, realign[i].nnewres, realign[i].start, realign[i].end );
+}
+static void smoothing1( int njob, int nadd, int lenfull, char **seq, Blocktorealign *realign )
+{
+ int i, j, norg = njob-nadd;
+
+ reporterr( "Smoothing1\n" );
+ for( i=1; i<lenfull+1; i++ )
+ {
+ int postoshiftfrom;
+ int shiftonadd;
+ int postoshiftto;
+ if( realign[i-1].nnewres && realign[i].nnewres )
+ {
+ postoshiftto = realign[i-1].start;
+ postoshiftfrom = realign[i-1].end + 1;
+ if( postoshiftfrom != realign[i].start -1 )
+ {
+ reporterr( "Unexpected pattern??? i=%d, realign[i-1].end=%d, realign[i].start=%d\n", i, realign[i-1].end, realign[i].start );
+ exit( 1 );
+ }
+
+ realign[i].nnewres += realign[i-1].nnewres;
+ realign[i].start = realign[i-1].start+1;
+ realign[i-1].nnewres = 0;
+ realign[i-1].start = realign[i-1].end = 0;
+
+
+// reporterr( "SHIFT %d -> %d\n", postoshiftfrom, postoshiftto );
+ for( j=0; j<norg; j++ )
+ {
+ if( seq[j][postoshiftto] != '-' )
+ {
+ reporterr( "Unexpected pattern 2???\n" );
+ exit( 1 );
+ }
+ seq[j][postoshiftto] = seq[j][postoshiftfrom];
+ seq[j][postoshiftfrom] = '-';
+ }
+ for( j=norg; j<njob; j++ )
+ {
+ if( seq[j][postoshiftto] == '-' )
+ {
+ shiftonadd = searchlet( seq[j]+postoshiftto, seq[j]+postoshiftfrom );
+ if( shiftonadd != -1 )
+ {
+ seq[j][postoshiftto] = seq[j][postoshiftto+shiftonadd];
+ seq[j][postoshiftto+shiftonadd] = '-';
+ }
+ }
+ }
+ }
+ }
+// for( i=0; i<lenfull+1; i++ ) fprintf( stderr, "i=%d, nnewres=%d, start=%d, end=%d\n", i, realign[i].nnewres, realign[i].start, realign[i].end );
+}
+
+int main( int argc, char *argv[] )
+{
+ static int *nlen;
+ static char **name, **seq;
+ static char **tmpseq;
+ static char *tmpseq1;
+// static char *check1, *check2;
+ static double **iscore, **iscore_kozo;
+ static double *eff_kozo, *eff_kozo_mapped = NULL;
+ int i, j, f, ien;
+ int iadd;
+ static int ***topol_kozo;
+ Treedep *dep;
+ static double **len_kozo;
+ FILE *prep;
+ FILE *infp;
+ FILE *hat2p;
+ int alignmentlength;
+ char c;
+ int alloclen, fullseqlen, tmplen;
+ LocalHom **localhomtable = NULL;
+ static char *kozoarivec;
+ int nkozo;
+ int njobc, norg, lenfull;
+ int **newgaplist_o;
+ int *newgaplist_compact;
+ int **follower;
+ int *follows;
+ int *istherenewgap;
+ int zure;
+ int *posmap;
+ int *ordertable;
+ FILE *orderfp;
+ int tmpseqlen;
+ Blocktorealign *realign;
+ RNApair ***singlerna;
+ int ***topol;
+ double **len;
+ double **iscoreo, **nscore;
+ FILE *fp;
+ int **deletelist = NULL;
+ char **addbk = NULL;
+ char *originalgaps = NULL;
+ Addtree *addtree;
+
+
+ arguments( argc, argv );
+#ifndef enablemultithread
+ nthread = 0;
+#endif
+
+
+ if( fastathreshold < 0.0001 ) constraint = 0;
+
+ if( inputfile )
+ {
+ infp = fopen( inputfile, "r" );
+ if( !infp )
+ {
+ fprintf( stderr, "Cannot open %s\n", inputfile );
+ exit( 1 );
+ }
+ }
+ else
+ infp = stdin;
+
+ getnumlen( infp );
+ rewind( infp );
+
+
+ nkozo = 0;
+
+ if( njob < 2 )
+ {
+ fprintf( stderr, "At least 2 sequences should be input!\n"
+ "Only %d sequence found.\n", njob );
+ exit( 1 );
+ }
+
+ norg = njob-nadd;
+ njobc = norg+1;
+ fprintf( stderr, "norg = %d\n", norg );
+ fprintf( stderr, "njobc = %d\n", njobc );
+ if( norg > 1000 || nadd > 1000 ) use_fft = 0;
+
+ fullseqlen = alloclen = nlenmax*4+1; //chuui!
+ seq = AllocateCharMtx( njob, alloclen );
+
+ name = AllocateCharMtx( njob, B+1 );
+ nlen = AllocateIntVec( njob );
+
+ ndeleted = 0;
+
+
+ if( multidist || tuplesize > 0 )
+ {
+ iscore = AllocateFloatHalfMtx( norg );
+ nscore = AllocateFloatMtx( norg, nadd );
+ }
+ else
+ {
+ iscore = AllocateFloatHalfMtx( njob );
+ nscore = NULL;
+ }
+
+ kozoarivec = AllocateCharVec( njob );
+
+
+ ordertable = AllocateIntVec( norg+1 );
+
+
+ if( constraint )
+ {
+#if SMALLMEMORY
+ if( multidist )
+ {
+ localhomtable = (LocalHom **)calloc( norg, sizeof( LocalHom *) );
+ for( i=0; i<norg; i++)
+ {
+ localhomtable[i] = (LocalHom *)calloc( nadd, sizeof( LocalHom ) );
+ for( j=0; j<nadd; j++)
+ {
+ localhomtable[i][j].start1 = -1;
+ localhomtable[i][j].end1 = -1;
+ localhomtable[i][j].start2 = -1;
+ localhomtable[i][j].end2 = -1;
+ localhomtable[i][j].overlapaa = -1.0;
+ localhomtable[i][j].opt = -1.0;
+ localhomtable[i][j].importance = -1.0;
+ localhomtable[i][j].next = NULL;
+ localhomtable[i][j].korh = 'h';
+ }
+ }
+// localhomtable = (LocalHom **)calloc( norg+nadd, sizeof( LocalHom *) );
+// for( i=norg; i<norg+nadd; i++) // hontou ha iranai
+// {
+// localhomtable[i] = (LocalHom *)calloc( norg, sizeof( LocalHom ) );
+// for( j=0; j<norg; j++)
+// {
+// localhomtable[i][j].start1 = -1;
+// localhomtable[i][j].end1 = -1;
+// localhomtable[i][j].start2 = -1;
+// localhomtable[i][j].end2 = -1;
+// localhomtable[i][j].overlapaa = -1.0;
+// localhomtable[i][j].opt = -1.0;
+// localhomtable[i][j].importance = -1.0;
+// localhomtable[i][j].next = NULL;
+// localhomtable[i][j].korh = 'h';
+// }
+// }
+ }
+ else
+#endif
+ {
+ localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
+ for( i=0; i<njob; i++)
+ {
+ localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
+ for( j=0; j<njob; j++)
+ {
+ localhomtable[i][j].start1 = -1;
+ localhomtable[i][j].end1 = -1;
+ localhomtable[i][j].start2 = -1;
+ localhomtable[i][j].end2 = -1;
+ localhomtable[i][j].overlapaa = -1.0;
+ localhomtable[i][j].opt = -1.0;
+ localhomtable[i][j].importance = -1.0;
+ localhomtable[i][j].next = NULL;
+ localhomtable[i][j].korh = 'h';
+ }
+ }
+ }
+
+ fprintf( stderr, "Loading 'hat3' ... " );
+ prep = fopen( "hat3", "r" );
+ if( prep == NULL ) ErrorExit( "Make hat3." );
+#if SMALLMEMORY
+ if( multidist )
+ {
+// readlocalhomtable_two( prep, norg, nadd, localhomtable, localhomtable+norg, kozoarivec );
+ readlocalhomtable_one( prep, norg, nadd, localhomtable, kozoarivec );
+ }
+ else
+#endif
+ {
+ readlocalhomtable( prep, njob, localhomtable, kozoarivec );
+ }
+
+ fclose( prep );
+ fprintf( stderr, "\ndone.\n" );
+
+
+ nkozo = 0;
+ for( i=0; i<njob; i++ )
+ {
+// fprintf( stderr, "kozoarivec[%d] = %d\n", i, kozoarivec[i] );
+ if( kozoarivec[i] ) nkozo++;
+ }
+ if( nkozo )
+ {
+ topol_kozo = AllocateIntCub( nkozo, 2, 0 );
+ len_kozo = AllocateFloatMtx( nkozo, 2 );
+ iscore_kozo = AllocateFloatHalfMtx( nkozo );
+ eff_kozo = AllocateDoubleVec( nkozo );
+ eff_kozo_mapped = AllocateDoubleVec( njob );
+ }
+
+
+#if SMALLMEMORY
+// outlocalhom_part( localhomtable, norg, nadd );
+#else
+// outlocalhom( localhomtable, njob );
+#endif
+
+#if 0
+ fprintf( stderr, "Extending localhom ... " );
+ extendlocalhom2( njob, localhomtable );
+ fprintf( stderr, "done.\n" );
+#endif
+ }
+
+#if 0
+ readData( infp, name, nlen, seq );
+#else
+ readData_pointer( infp, name, nlen, seq );
+ fclose( infp );
+#endif
+
+ constants( njob, seq );
+
+#if 0
+ fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
+#endif
+
+ initSignalSM();
+
+ initFiles();
+
+// WriteOptions( trap_g );
+
+ c = seqcheck( seq );
+ if( c )
+ {
+ fprintf( stderr, "Illegal character %c\n", c );
+ exit( 1 );
+ }
+
+ alignmentlength = strlen( seq[0] );
+ for( i=0; i<norg; i++ )
+ {
+ if( hitout == 0.0 && alignmentlength != strlen( seq[i] ) )
+ {
+ fprintf( stderr, "#################################################################################\n" );
+ fprintf( stderr, "# ERROR! #\n" );
+ fprintf( stderr, "# The original%4d sequences must be aligned #\n", njob-nadd );
+ fprintf( stderr, "#################################################################################\n" );
+ exit( 1 );
+ }
+ }
+ if( addprofile )
+ {
+ fprintf( stderr, "Not supported!\n" );
+ exit( 1 );
+ }
+
+ if( tuplesize > 0 ) // if mtx is internally computed
+ {
+ if( multidist == 1 )
+ {
+ ktupledistancematrix( njob, norg, nlenmax, seq, name, iscore, nscore ); // iscore ha muda.
+
+// hat2p = fopen( "hat2-1", "w" );
+// WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, iscore );
+// fclose( hat2p );
+
+ dndpre( norg, seq, iscore );
+// fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " );
+// prep = fopen( "hat2i", "r" );
+// if( prep == NULL ) ErrorExit( "Make hat2i." );
+// readhat2_doublehalf_pointer( prep, njob-nadd, name, iscore );
+// fclose( prep );
+// fprintf( stderr, "done.\n" );
+
+// hat2p = fopen( "hat2-2", "w" );
+// WriteFloatHat2_pointer_halfmtx( hat2p, norg, name, iscore );
+// fclose( hat2p );
+ }
+ else
+ {
+ ktupledistancematrix( njob, norg, nlenmax, seq, name, iscore, nscore );
+ }
+ }
+ else
+ {
+ if( multidist == 1 )
+ {
+ fprintf( stderr, "Loading 'hat2n' (aligned sequences - new sequences) ... " );
+ prep = fopen( "hat2n", "r" );
+ if( prep == NULL ) ErrorExit( "Make hat2n." );
+ readhat2_doublehalf_part_pointer( prep, njob, nadd, name, nscore );
+ fclose( prep );
+ fprintf( stderr, "done.\n" );
+
+ fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " );
+ prep = fopen( "hat2i", "r" );
+ if( prep == NULL ) ErrorExit( "Make hat2i." );
+ readhat2_doublehalf_pointer( prep, njob-nadd, name, iscore );
+ fclose( prep );
+ fprintf( stderr, "done.\n" );
+ }
+ else
+ {
+ fprintf( stderr, "Loading 'hat2' ... " );
+ prep = fopen( "hat2", "r" );
+ if( prep == NULL ) ErrorExit( "Make hat2." );
+ readhat2_doublehalf_pointer( prep, njob, name, iscore );
+ fclose( prep );
+ fprintf( stderr, "done.\n" );
+ }
+ }
+
+#if 1
+ if( distout )
+ {
+ fprintf( stderr, "Writing distances between new sequences and existing msa.\n" );
+ hat2p = fopen( "hat2", "w" );
+ if( multidist || tuplesize > 0 )
+ {
+ for( iadd=0; iadd<nadd; iadd++ )
+ {
+ fprintf( hat2p, "Distance between new sequence %d and %d sequences in existing msa\n", iadd+1, norg );
+ for( i=0; i<norg; i++ )
+ {
+ fprintf( hat2p, "%5.3f ", nscore[i][iadd] );
+ if( (i+1) % 12 == 0 ) fprintf( hat2p, "\n" );
+ }
+ fprintf( hat2p, "\n\n" );
+ }
+ }
+ else
+ {
+ for( iadd=0; iadd<nadd; iadd++ )
+ {
+ fprintf( hat2p, "Distance between new sequence %d and %d sequences in existing msa\n", iadd+1, norg );
+ for( i=0; i<norg; i++ )
+ {
+ fprintf( hat2p, "%5.3f ", iscore[i][norg+iadd-i] );
+ if( (i+1) % 12 == 0 ) fprintf( hat2p, "\n" );
+ }
+ fprintf( hat2p, "\n\n" );
+ }
+ }
+ fclose( hat2p );
+// exit( 1 );
+// hat2p = fopen( "hat2", "w" );
+// WriteFloatHat2_pointer_halfmtx( hat2p, norg, name, iscore );
+// fclose( hat2p );
+// exit( 1 );
+ }
+#endif
+
+
+ singlerna = NULL;
+
+
+ if( keeplength )
+ {
+ lenfull = strlen( seq[0] );
+ originalgaps = (char *)calloc( lenfull+1, sizeof( char) );
+ recordoriginalgaps( originalgaps, norg, seq );
+
+
+ deletelist = (int **)calloc( nadd+1, sizeof( int * ) );
+ for( i=0; i<nadd; i++ )
+ {
+ deletelist[i] = calloc( 1, sizeof( int ) );
+ deletelist[i][0] = -1;
+ }
+ deletelist[nadd] = NULL;
+
+ }
+ else
+ {
+ originalgaps = NULL;
+ deletelist = NULL;
+ }
+
+ commongappick( norg, seq );
+ lenfull = strlen( seq[0] );
+
+ if( keeplength && mapout )
+ {
+ addbk = (char **)calloc( nadd+1, sizeof( char * ) );
+ for( i=0; i<nadd; i++ )
+ {
+ ien = strlen( seq[norg+i] );
+ addbk[i] = (char *)calloc( ien + 1, sizeof( char ) );
+ gappick0( addbk[i], seq[norg+i] );
+ }
+ addbk[nadd] = NULL;
+ }
+ else
+ {
+ addbk = NULL;
+ }
+
+
+
+// newgaplist_o = AllocateIntMtx( nadd, alloclen ); //ookisugi
+ newgaplist_o = AllocateIntMtx( nadd, lenfull*2 );
+ newgaplist_compact = AllocateIntVec( lenfull*2 );
+ istherenewgap = AllocateIntVec( nadd );
+ follower = AllocateIntMtx( norg, 1 );
+ for( i=0; i<norg; i++ ) follower[i][0] = -1;
+ follows = AllocateIntVec( nadd );
+
+ dep = (Treedep *)calloc( norg, sizeof( Treedep ) );
+ topol = AllocateIntCub( norg, 2, 0 );
+ len = AllocateFloatMtx( norg, 2 );
+// iscoreo = AllocateFloatHalfMtx( norg );
+ mtxcpy( norg, norg, &iscoreo, iscore );
+
+ if( treeout )
+ {
+ addtree = (Addtree *)calloc( nadd, sizeof( Addtree ) );
+ if( !addtree )
+ {
+ fprintf( stderr, "Cannot allocate addtree\n" );
+ exit( 1 );
+ }
+ }
+
+
+// nlim = norg-1;
+// for( i=0; i<nlim; i++ )
+// {
+// fptc = iscoreo[i]+1;
+// fpt = iscore[i]+1;
+// j = norg-i-1;
+// while( j-- )
+// *fptc++ = *fpt++;
+//// for( j=i+1; j<norg; j++ )
+//// iscoreo[i][j-i] = iscore[i][j-i];
+// }
+
+// fprintf( stderr, "building a tree.." );
+ if( treein )
+ {
+ reporterr( "Loading a tree ... " );
+ loadtop( norg, iscoreo, topol, len, name, NULL, dep ); // nogaplen?
+ reporterr( "\ndone.\n\n" );
+ }
+ else if( treeout )
+ fixed_musclesupg_double_realloc_nobk_halfmtx_treeout( norg, iscoreo, topol, len, name, nlen, dep, 1 );
+ else
+ fixed_musclesupg_double_realloc_nobk_halfmtx( norg, iscoreo, topol, len, dep, 0, 1 );
+// fprintf( stderr, "done.\n" );
+
+ if( norg > 1 )
+ cnctintvec( ordertable, topol[norg-2][0], topol[norg-2][1] );
+ else
+ {
+ ordertable[0] = 0; ordertable[1] = -1;
+ }
+ FreeFloatHalfMtx( iscoreo, norg );
+
+#ifdef enablemultithread
+ if( nthread )
+ {
+ pthread_t *handle;
+ pthread_mutex_t mutex_counter;
+ thread_arg_t *targ;
+ int *iaddsharept;
+
+ targ = calloc( nthread, sizeof( thread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex_counter, NULL );
+ iaddsharept = calloc( 1, sizeof(int) );
+ *iaddsharept = 0;
+
+ for( i=0; i<nthread; i++ )
+ {
+ targ[i].thread_no = i;
+ targ[i].follows = follows;
+ targ[i].njob = njob;
+ targ[i].nadd = nadd;
+ targ[i].nlen = nlen;
+ targ[i].name = name;
+ targ[i].seq = seq;
+ targ[i].localhomtable = localhomtable;
+ targ[i].iscore = iscore;
+ targ[i].nscore = nscore;
+ targ[i].istherenewgap = istherenewgap;
+ targ[i].newgaplist = newgaplist_o;
+ targ[i].singlerna = singlerna;
+ targ[i].eff_kozo_mapped = eff_kozo_mapped;
+ targ[i].alloclen = alloclen;
+ targ[i].iaddshare = iaddsharept;
+ targ[i].dep = dep;
+ targ[i].topol = topol;
+ targ[i].len = len;
+ targ[i].addtree = addtree;
+ targ[i].deletelist = deletelist;
+ targ[i].mutex_counter = &mutex_counter;
+ pthread_create( handle+i, NULL, addsinglethread, (void *)(targ+i) );
+ }
+ for( i=0; i<nthread; i++ )
+ {
+ pthread_join( handle[i], NULL );
+ }
+ pthread_mutex_destroy( &mutex_counter );
+ free( handle );
+ free( targ );
+ free( iaddsharept );
+ }
+ else
+#endif
+ {
+ thread_arg_t *targ;
+ targ = calloc( 1, sizeof( thread_arg_t ) );
+ targ[0].follows = follows;
+ targ[0].njob = njob;
+ targ[0].nadd = nadd;
+ targ[0].nlen = nlen;
+ targ[0].name = name;
+ targ[0].seq = seq;
+ targ[0].localhomtable = localhomtable;
+ targ[0].iscore = iscore;
+ targ[0].nscore = nscore;
+ targ[0].istherenewgap = istherenewgap;
+ targ[0].newgaplist = newgaplist_o;
+ targ[0].singlerna = singlerna;
+ targ[0].eff_kozo_mapped = eff_kozo_mapped;
+ targ[0].alloclen = alloclen;
+ targ[0].dep = dep;
+ targ[0].topol = topol;
+ targ[0].len = len;
+ targ[0].addtree = addtree;
+ targ[0].deletelist = deletelist;
+ addsinglethread( targ );
+ free( targ );
+ }
+ free( dep );
+ FreeFloatMtx( len );
+ if( multidist || tuplesize > 0 ) FreeFloatMtx( nscore );
+
+
+// for( i=0; i<nadd; i++ ) fprintf( stdout, ">%s (%d) \n%s\n", name[norg+i], norg+i, seq[norg+i] );
+
+ if( treeout )
+ {
+ fp = fopen( "infile.tree", "a" );
+ if( fp == 0 )
+ {
+ fprintf( stderr, "File error!\n" );
+ exit( 1 );
+ }
+ for( i=0; i<nadd; i++ )
+ {
+ fprintf( fp, "\n" );
+ fprintf( fp, "%8d: %s\n", norg+i+1, name[norg+i]+1 );
+ fprintf( fp, " nearest sequence: %d\n", addtree[i].nearest + 1 );
+ fprintf( fp, " approximate distance: %f\n", addtree[i].dist1 );
+ fprintf( fp, " sister group: %s\n", addtree[i].neighbors );
+ fprintf( fp, " approximate distance: %f\n", addtree[i].dist2 );
+ free( addtree[i].neighbors );
+ }
+ fclose( fp );
+ free( addtree );
+ }
+
+ for( iadd=0; iadd<nadd; iadd++ )
+ {
+ f = follows[iadd];
+ for( i=0; follower[f][i]!=-1; i++ )
+ ;
+ if( !(follower[f] = realloc( follower[f], (i+2)*sizeof(int) ) ) )
+ {
+ fprintf( stderr, "Cannot reallocate follower[]" );
+ exit( 1 );
+ }
+ follower[f][i] = iadd;
+ follower[f][i+1] = -1;
+#if 0
+ fprintf( stderr, "\nfollowers of %d = ", f );
+ for( i=0; follower[f][i]!=-1; i++ )
+ fprintf( stderr, "%d ", follower[f][i] );
+ fprintf( stderr, "\n" );
+#endif
+ }
+
+ orderfp = fopen( "order", "w" );
+ if( !orderfp )
+ {
+ fprintf( stderr, "Cannot open 'order'\n" );
+ exit( 1 );
+ }
+ for( i=0; ordertable[i]!=-1; i++ )
+ {
+ fprintf( orderfp, "%d\n", ordertable[i] );
+// for( j=0; follower[i][j]!=-1; j++ )
+// fprintf( orderfp, "%d\n", follower[i][j]+norg );
+ for( j=0; follower[ordertable[i]][j]!=-1; j++ )
+ fprintf( orderfp, "%d\n", follower[ordertable[i]][j]+norg );
+// fprintf( orderfp, "%d -> %d\n", follower[i][j]+norg, i );
+ }
+ fclose( orderfp );
+
+ posmap = AllocateIntVec( lenfull+2 );
+ realign = calloc( lenfull+2, sizeof( Blocktorealign ) );
+ for( i=0; i<lenfull+1; i++ ) posmap[i] = i;
+ for( i=0; i<lenfull+1; i++ )
+ {
+ realign[i].nnewres = 0;
+ realign[i].start = 0;
+ realign[i].end = 0;
+ }
+
+ fprintf( stderr, "\n\nCombining ..\n" );
+ fflush( stderr );
+ tmpseqlen = alloclen * 100;
+ tmpseq = AllocateCharMtx( 1, tmpseqlen );
+
+
+// check1 = AllocateCharVec( tmpseqlen );
+// check2 = AllocateCharVec( tmpseqlen );
+// gappick0( check2, seq[0] );
+ for( iadd=0; iadd<nadd; iadd++ )
+ {
+// fprintf( stderr, "%d / %d\r", iadd, nadd );
+ fflush( stderr );
+
+// fprintf( stderr, "\niadd == %d\n", iadd );
+ makegaplistcompact( lenfull, posmap, newgaplist_compact, newgaplist_o[iadd] );
+ if( iadd == 0 || istherenewgap[iadd] )
+ {
+ tmpseq1 = tmpseq[0];
+// gaplist2alnx( lenfull, tmpseq1, seq[0], newgaplist_o[iadd], posmap, tmpseqlen );
+ gaplist2alnx( lenfull, tmpseq1, seq[0], newgaplist_compact, posmap, tmpseqlen );
+// fprintf( stderr, "len = %d ? %d\n", strlen( tmpseq1 ), alloclen );
+ if( ( tmplen = strlen( tmpseq1 ) ) >= fullseqlen )
+ {
+ fullseqlen = tmplen * 2+1;
+// fprintf( stderr, "Length over!\n" );
+// fprintf( stderr, "strlen(tmpseq1)=%d\n", (int)strlen( tmpseq1 ) );
+ fprintf( stderr, "reallocating..." );
+// fprintf( stderr, "alloclen=%d\n", alloclen );
+// fprintf( stderr, "Please recompile!\n" );
+// exit( 1 );
+ for( i=0; i<njob; i++ )
+ {
+ seq[i] = realloc( seq[i], fullseqlen * sizeof( char ) );
+ if( !seq[i] )
+ {
+ fprintf( stderr, "Cannot reallocate seq[][]\n" );
+ exit( 1 );
+ }
+ }
+ fprintf( stderr, "done.\n" );
+ }
+ strcpy( seq[0], tmpseq1 );
+
+ ien = norg+iadd;
+#ifdef enablemultithread
+ if( nthread > 0 && ien > 500 )
+ {
+ gaplist2alnxthread_arg_t *targ;
+ int jobpos;
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+ fprintf( stderr, "%d / %d (threads %d-%d)\r", iadd, nadd, 0, nthread );
+
+ targ = calloc( nthread, sizeof( gaplist2alnxthread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex, NULL );
+ jobpos = 1;
+ for( i=0; i<nthread; i++ )
+ {
+// targ[i].thread_no = i;
+ targ[i].ncycle = ien;
+ targ[i].jobpospt = &jobpos;
+ targ[i].tmpseqlen = tmpseqlen;
+ targ[i].lenfull = lenfull;
+ targ[i].seq = seq;
+// targ[i].newgaplist = newgaplist_o[iadd];
+ targ[i].newgaplist = newgaplist_compact;
+ targ[i].posmap = posmap;
+ targ[i].mutex = &mutex;
+
+ pthread_create( handle+i, NULL, gaplist2alnxthread, (void *)(targ+i) );
+ }
+ for( i=0; i<nthread; i++ )
+ {
+ pthread_join( handle[i], NULL );
+ }
+ pthread_mutex_destroy( &mutex );
+ free( handle );
+ free( targ );
+ }
+ else
+#endif
+ {
+ fprintf( stderr, "%d / %d\r", iadd, nadd );
+ for( i=1; i<ien; i++ )
+ {
+ tmpseq1 = tmpseq[0];
+ if( i == 1 ) fprintf( stderr, " %d / %d\r", iadd, nadd );
+// gaplist2alnx( lenfull, tmpseq1, seq[i], newgaplist_o[iadd], posmap, tmpseqlen );
+ gaplist2alnx( lenfull, tmpseq1, seq[i], newgaplist_compact, posmap, tmpseqlen );
+// fprintf( stderr, ">%s (iadd=%d)\n%s\n", name[i], iadd, tmpseq1 );
+ strcpy( seq[i], tmpseq1 );
+ }
+ }
+ }
+ tmpseq1 = tmpseq[0];
+// insertgapsbyotherfragments_simple( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap );
+ insertgapsbyotherfragments_compact( lenfull, tmpseq1, seq[norg+iadd], newgaplist_o[iadd], posmap );
+// fprintf( stderr, "%d = %s\n", iadd, tmpseq1 );
+ eq2dash( tmpseq1 );
+ strcpy( seq[norg+iadd], tmpseq1 );
+
+// adjustposmap( lenfull, posmap, newgaplist_o[iadd] );
+ adjustposmap( lenfull, posmap, newgaplist_compact );
+ countnewres( lenfull, realign, posmap, newgaplist_o[iadd] ); // muda?
+// countnewres( lenfull, realign, posmap, newgaplist_compact ); // muda?
+
+ }
+ fprintf( stderr, "\r done. \n\n" );
+
+#if 0
+ for( i=0; i<njob; i++ )
+ {
+ fprintf( stderr, ">%s\n", name[i] );
+ fprintf( stderr, "%s\n", seq[i] );
+ }
+#endif
+
+#if 0
+ fprintf( stderr, "realign[].nnewres = " );
+ for( i=0; i<lenfull+1; i++ )
+ {
+ fprintf( stderr, "%d ", realign[i].nnewres );
+ }
+ fprintf( stderr, "\n" );
+ for( i=0; i<lenfull+1; i++ ) fprintf( stderr, "i=%d, nnewres=%d, start=%d, end=%d\n", i, realign[i].nnewres, realign[i].start, realign[i].end );
+#endif
+
+
+ if( smoothing )
+ {
+// for( i=0; i<lenfull+1; i++ ) fprintf( stderr, "i=%d, nnewres=%d, start=%d, end=%d\n", i, realign[i].nnewres, realign[i].start, realign[i].end );
+ smoothing1( njob, nadd, lenfull, seq, realign );
+// for( i=0; i<lenfull+1; i++ ) fprintf( stderr, "i=%d, nnewres=%d, start=%d, end=%d\n", i, realign[i].nnewres, realign[i].start, realign[i].end );
+ smoothing2( njob, nadd, lenfull, seq, realign );
+ }
+
+ for( i=0; i<lenfull+1; i++ )
+ {
+ if( realign[i].nnewres > 1 )
+ {
+// fprintf( stderr, "i=%d: %d-%d\n", i, realign[i].start, realign[i].end );
+ fprintf( stderr, "\rRealigning %d/%d \r", i, lenfull );
+// zure = dorealignment_compact( realign+i, seq, &fullseqlen, norg );
+// zure = dorealignment_order( realign+i, seq, &fullseqlen, norg, ordertable, follows );
+ zure = dorealignment_tree( realign+i, seq, &fullseqlen, norg, topol, follows );
+#if 0
+ gappick0( check1, seq[0] );
+ fprintf( stderr, "check1 = %s\n", check1 );
+ if( strcmp( check1, check2 ) )
+ {
+ fprintf( stderr, "CHANGED!!!!!\n" );
+ exit( 1 );
+ }
+#endif
+ for( j=i+1; j<lenfull+1; j++ )
+ {
+ if( realign[j].nnewres )
+ {
+ realign[j].start -= zure;
+ realign[j].end -= zure;
+ }
+ }
+ }
+ }
+ FreeIntCub( topol );
+ fprintf( stderr, "\r done. \n\n" );
+ fflush( stderr );
+
+
+ if( keeplength )
+ {
+ FILE *dlf;
+ restoreoriginalgaps( njob, seq, originalgaps );
+
+ dlf = fopen( "_deletelist", "w" );
+ for( i=0; i<nadd; i++ )
+ {
+ if( deletelist[i] )
+ for( j=0; deletelist[i][j]!=-1; j++ )
+ fprintf( dlf, "%d %d\n", njob-nadd+i, deletelist[i][j] ); // 0origin
+ }
+ fclose( dlf );
+
+ if( mapout )
+ {
+ dlf = fopen( "_deletemap", "w" );
+ reconstructdeletemap( nadd, addbk, deletelist, seq+njob-nadd, dlf, name+njob-nadd );
+ fclose( dlf );
+ }
+ }
+
+
+
+ FreeIntMtx( newgaplist_o );
+ FreeIntVec( newgaplist_compact );
+ FreeIntVec( posmap );
+ free( realign );
+ free( istherenewgap );
+ FreeIntMtx( follower );
+ free( follows );
+ free( ordertable );
+ free( kozoarivec );
+ free( nlen );
+ FreeCharMtx( tmpseq );
+ freeconstants();
+ if( addbk ) FreeCharMtx( addbk ); addbk = NULL;
+ if( deletelist ) FreeIntMtx( deletelist ); deletelist = NULL;
+ if( originalgaps ) free( originalgaps ); originalgaps = NULL;
+
+ writeData_pointer( prep_g, njob, name, nlen, seq );
+#if 0
+ writeData( stdout, njob, name, nlen, bseq );
+ writePre( njob, name, nlen, bseq, !contin );
+ writeData_pointer( prep_g, njob, name, nlen, aseq );
+#endif
+#if IODEBUG
+ fprintf( stderr, "OSHIMAI\n" );
+#endif
+
+#if SMALLMEMORY
+ if( multidist )
+ {
+// if( constraint ) FreeLocalHomTable_two( localhomtable, norg, nadd );
+ if( constraint ) FreeLocalHomTable_one( localhomtable, norg, nadd );
+ }
+ else
+#endif
+ {
+ if( constraint ) FreeLocalHomTable( localhomtable, njob );
+ }
+
+ SHOWVERSION;
+ if( ndeleted > 0 )
+ {
+ reporterr( "\nTo keep the alignment length, %d letters were DELETED.\n", ndeleted );
+ if( mapout )
+ reporterr( "The deleted letters are shown in the (filename).map file.\n" );
+ else
+ reporterr( "To know the positions of deleted letters, rerun the same command with the --mapout option.\n" );
+ }
+ return( 0 );
+}
#define DEFAULTGEP_B -00
#define DEFAULTOFS_B -123 /* +10 -- -50 teido ka ? */
-void BLOSUMmtx( int n, double **matrix, double *freq, char *amino, char *amino_grp )
+
+void BLOSUMmtx( int n, double **matrix, double *freq, unsigned char *amino, char *amino_grp )
{
/*
char locaminod[26] = "GASTPLIMVDNEQFYWKRHCXXX.-U";
-4, -4, -4, -6, -5, -3, -5, -6, 3, -3, -2, -4, -3, 4, -6, -3, -3, 3, 11,
-1, -4, -5, -6, -2, -4, -4, -6, -5, 4, 1, -4, 1, -2, -4, -3, 0, -5, -3, 7,
};
+ double tmpmtx90[] =
+ {
+ 5,
+-2, 6,
+-2, -1, 7,
+-3, -3, 1, 7,
+-1, -5, -4, -5, 9,
+-1, 1, 0, -1, -4, 7,
+-1, -1, -1, 1, -6, 2, 6,
+ 0, -3, -1, -2, -4, -3, -3, 6,
+-2, 0, 0, -2, -5, 1, -1, -3, 8,
+-2, -4, -4, -5, -2, -4, -4, -5, -4, 5,
+-2, -3, -4, -5, -2, -3, -4, -5, -4, 1, 5,
+-1, 2, 0, -1, -4, 1, 0, -2, -1, -4, -3, 6,
+-2, -2, -3, -4, -2, 0, -3, -4, -3, 1, 2, -2, 7,
+-3, -4, -4, -5, -3, -4, -5, -5, -2, -1, 0, -4, -1, 7,
+-1, -3, -3, -3, -4, -2, -2, -3, -3, -4, -4, -2, -3, -4, 8,
+ 1, -1, 0, -1, -2, -1, -1, -1, -2, -3, -3, -1, -2, -3, -2, 5,
+ 0, -2, 0, -2, -2, -1, -1, -3, -2, -1, -2, -1, -1, -3, -2, 1, 6,
+-4, -4, -5, -6, -4, -3, -5, -4, -3, -4, -3, -5, -2, 0, -5, -4, -4, 11,
+-3, -3, -3, -4, -4, -3, -4, -5, 1, -2, -2, -3, -2, 3, -4, -3, -2, 2, 8,
+-1, -3, -4, -5, -2, -3, -3, -5, -4, 3, 0, -3, 0, -2, -3, -2, -1, -3, -3, 5,
+ };
+ double tmpmtx100[] =
+ {
+ 8,
+-3,10,
+-4,-2,11,
+-5,-5, 1,10,
+-2,-8,-5,-8,14,
+-2, 0,-1,-2,-7,11,
+-3,-2,-2, 2,-9, 2,10,
+-1,-6,-2,-4,-7,-5,-6, 9,
+-4,-1, 0,-3,-8, 1,-2,-6,13,
+-4,-7,-7,-8,-3,-6,-7,-9,-7, 8,
+-4,-6,-7,-8,-5,-5,-7,-8,-6, 2, 8,
+-2, 3,-1,-3,-8, 2, 0,-5,-3,-6,-6,10,
+-3,-4,-5,-8,-4,-2,-5,-7,-5, 1, 3,-4,12,
+-5,-6,-7,-8,-4,-6,-8,-8,-4,-2, 0,-6,-1,11,
+-2,-5,-5,-5,-8,-4,-4,-6,-5,-7,-7,-3,-5,-7,12,
+ 1,-3, 0,-2,-3,-2,-2,-2,-3,-5,-6,-2,-4,-5,-3, 9,
+-1,-3,-1,-4,-3,-3,-3,-5,-4,-3,-4,-3,-2,-5,-4, 2, 9,
+-6,-7,-8,-10,-7,-5,-8,-7,-5,-6,-5,-8,-4, 0,-8,-7,-7,17,
+-5,-5,-5,-7,-6,-4,-7,-8, 1,-4,-4,-5,-5, 4,-7,-5,-5, 2,12,
+-2,-6,-7,-8,-3,-5,-5,-8,-7, 4, 0,-5, 0,-3,-6,-4,-1,-5,-5, 8,
+ };
double tmpmtx0[] =
{
2.4,
else if( n == 50 ) tmpmtx = tmpmtx50;
else if( n == 62 ) tmpmtx = tmpmtx62;
else if( n == 80 ) tmpmtx = tmpmtx80;
+ else if( n == 90 ) tmpmtx = tmpmtx90;
+ else if( n == 100 ) tmpmtx = tmpmtx100;
else if( n == 0 ) tmpmtx = tmpmtx0;
else if( n == -1 ) tmpmtx = loadaamtx();
else
for( i=0; i<26; i++ ) amino[i] = locaminod[i];
for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpd[i];
}
+
+void extendedmtx( double **matrix, double *freq, unsigned char *amino, char *amino_grp )
+{
+ int i;
+ int j;
+
+ for( i=0; i<nalphabets; i++ )
+ {
+// fprintf( stderr, "i=%d, i=%c\n", i, i );
+ amino[i] = (unsigned char)i;
+ }
+ for( i=0; i<nalphabets; i++ ) amino_grp[(int)amino[i]] = i % 6;
+ for( i=0; i<nalphabets; i++ ) freq[i] = 1.0/nalphabets;
+
+ for( i=0; i<nalphabets; i++ )
+ {
+ for( j=0; j<=i; j++ )
+ {
+ matrix[i][j] = matrix[j][i] = (double)-1.0;
+ }
+ }
+ for( i=0; i<nalphabets; i++ )
+ matrix[i][i] = matrix[i][i] = (double)1.0;
+#if 0 // user-defined matrix no toki fukkatsu saseru.
+ if( tmpmtx[400] != -1.0 )
+ {
+ for( i=0; i<20; i++ ) freq[i] = tmpmtx[400+i];
+ av = 0.0;
+ for( i=0; i<20; i++ ) av += freq[i];
+ for( i=0; i<20; i++ ) freq[i] /= av;
+ }
+ else
+ for( i=0; i<20; i++ ) freq[i] = freqd[i];
+#endif
+#if 0
+ for( i=0; i<nalphabets; i++ )
+ {
+ fprintf( stderr, "%d: %c, %d, %f\n", i, amino[i], amino_grp[amino[i]], freq[i] );
+ }
+#endif
+}
--- /dev/null
+rm -rf binaries64-cygwin/
+mkdir binaries64-cygwin/
+make clean
+rm -f ../binaries/*
+make CC=x86_64-w64-mingw32-gcc CFLAGS="-static -O3 -Dmingw"
+mv ../binaries/* binaries64-cygwin/
+
+rm -rf binaries32-cygwin/
+mkdir binaries32-cygwin/
+make clean
+rm -f ../binaries/*
+make CC=i686-w64-mingw32-gcc CFLAGS="-static -O3 -Dmingw"
+mv ../binaries/* binaries32-cygwin/
--- /dev/null
+#!/usr/bin/env bash
+export PATH=/home/mingw32/mingw32/bin:$PATH
+export C_INCLUDE_PATH=/home/mingw32/mingw32/include
+export LIBRARY_PATH=/home/mingw32/mingw32/lib
+make clean
+make ENABLE_MULTITHREAD="" CFLAGS="-Dmingw -O3"
+rm -rf binaries32
+mkdir binaries32
+mv ../binaries/* binaries32/
+
+export PATH=/home/mingw64/mingw64/bin:$PATH
+export C_INCLUDE_PATH=/home/mingw64/mingw64/include
+export LIBRARY_PATH=/home/mingw64/mingw64/lib
+make clean
+make ENABLE_MULTITHREAD="" CFLAGS="-Dmingw -O3"
+rm -rf binaries64
+mkdir binaries64
+mv ../binaries/* binaries64/
+
+
+
+export PATH=/home/mingw32/mingw32/bin:$PATH
+export C_INCLUDE_PATH=/home/mingw32/mingw32/include
+export LIBRARY_PATH=/home/mingw32/mingw32/lib
+make clean
+make LIBS="-static -lm -lpthread" CFLAGS="-Dmingw -O3"
+rm -rf binaries32-multithread
+mkdir binaries32-multithread
+mv ../binaries/* binaries32-multithread/
+
+export PATH=/home/mingw64/mingw64/bin:$PATH
+export C_INCLUDE_PATH=/home/mingw64/mingw64/include
+export LIBRARY_PATH=/home/mingw64/mingw64/lib
+make clean
+make LIBS="-static -lm -lpthread" CFLAGS="-Dmingw -O3"
+rm -rf binaries64-multithread
+mkdir binaries64-multithread
+mv ../binaries/* binaries64-multithread/
#define NORMALIZE1 1
+
static int shishagonyuu( double in )
{
int out;
return( out );
}
+static void nscore( int *amino_n, int **n_dis )
+{
+ int i;
+ for( i=0; i<26; i++ )
+ {
+// reporterr( "i=%d (%c), n_dis[%d][%d] = %d\n", i, amino[i], i, amino_n['n'], n_dis[i][amino_n['n']] );
+ n_dis[i][amino_n['n']] = shishagonyuu( (double)0.25 * n_dis[i][i] );
+// reporterr( "-> i=%d, n_dis[%d][%d] = %d\n", i, i, amino_n['n'], n_dis[i][amino_n['n']] );
+ n_dis[amino_n['n']][i] = n_dis[i][amino_n['n']];
+ }
+// n_dis[amino_n['n']][amino_n['n']] = shishagonyuu( (double)0.25 * 0.25 * ( n_dis[0][0] + n_dis[1][1] + n_dis[2][2] + n_dis[3][3] ) );
+ n_dis[amino_n['n']][amino_n['n']] = shishagonyuu( (double)0.25 * ( n_dis[0][0] + n_dis[1][1] + n_dis[2][2] + n_dis[3][3] ) ); // 2017/Jan/2
+
+#if 0 // Ato de kakunin
+ for( i=0; i<26; i++ )
+ {
+ n_dis[i][amino_n['-']] = shishagonyuu( (double)0.25 * n_dis[i][i] );
+ n_dis[amino_n['-']][i] = n_dis[i][amino_n['-']];
+ }
+// n_dis[amino_n['-']][amino_n['-']] = shishagonyuu( (double)0.25 * 0.25 * ( n_dis[0][0] + n_dis[1][1] + n_dis[2][2] + n_dis[3][3] ) ); // DAME!
+#endif
+}
+
+
+static void ambiguousscore( int *amino_n, int **n_dis )
+{
+ int i;
+ for( i=0; i<26; i++ )
+ {
+ n_dis[i][amino_n['r']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['g']][i] ) );
+ n_dis[i][amino_n['y']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['c']][i] + n_dis[amino_n['t']][i] ) );
+ n_dis[i][amino_n['k']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][i] + n_dis[amino_n['t']][i] ) );
+ n_dis[i][amino_n['m']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['c']][i] ) );
+ n_dis[i][amino_n['s']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][i] + n_dis[amino_n['c']][i] ) );
+ n_dis[i][amino_n['w']] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['t']][i] ) );
+ n_dis[i][amino_n['b']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['c']][i] + n_dis[amino_n['g']][i] + n_dis[amino_n['t']][i] ) );
+ n_dis[i][amino_n['d']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['g']][i] + n_dis[amino_n['t']][i] ) );
+ n_dis[i][amino_n['h']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['c']][i] + n_dis[amino_n['t']][i] ) );
+ n_dis[i][amino_n['v']] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][i] + n_dis[amino_n['c']][i] + n_dis[amino_n['g']][i] ) );
+
+ n_dis[amino_n['r']][i] = n_dis[i][amino_n['r']];
+ n_dis[amino_n['y']][i] = n_dis[i][amino_n['y']];
+ n_dis[amino_n['k']][i] = n_dis[i][amino_n['k']];
+ n_dis[amino_n['m']][i] = n_dis[i][amino_n['m']];
+ n_dis[amino_n['s']][i] = n_dis[i][amino_n['s']];
+ n_dis[amino_n['w']][i] = n_dis[i][amino_n['w']];
+ n_dis[amino_n['b']][i] = n_dis[i][amino_n['b']];
+ n_dis[amino_n['d']][i] = n_dis[i][amino_n['d']];
+ n_dis[amino_n['h']][i] = n_dis[i][amino_n['h']];
+ n_dis[amino_n['v']][i] = n_dis[i][amino_n['v']];
+ }
+
+ i = amino_n['r']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['g']][amino_n['g']] ) );
+ i = amino_n['y']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['t']][amino_n['t']] ) );
+ i = amino_n['k']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['t']][amino_n['t']] ) );
+ i = amino_n['m']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['c']][amino_n['c']] ) );
+ i = amino_n['s']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['c']][amino_n['c']] ) );
+ i = amino_n['w']; n_dis[i][i] = shishagonyuu( (double)1/2 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['t']][amino_n['t']] ) );
+ i = amino_n['b']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['t']][amino_n['t']] ) );
+ i = amino_n['d']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['g']][amino_n['g']] + n_dis[amino_n['t']][amino_n['t']] ) );
+ i = amino_n['h']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['t']][amino_n['t']] ) );
+ i = amino_n['v']; n_dis[i][i] = shishagonyuu( (double)1/3 * ( n_dis[amino_n['a']][amino_n['a']] + n_dis[amino_n['c']][amino_n['c']] + n_dis[amino_n['g']][amino_n['g']] ) );
+}
+
+
static void calcfreq_nuc( int nseq, char **seq, double *datafreq )
{
int i, j, l;
}
}
}
- for( i=0; i<4; i++ )
- if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001;
+ total = 0.0; for( i=0; i<4; i++ ) total += datafreq[i];
+ for( i=0; i<4; i++ ) datafreq[i] /= (double)total;
+ for( i=0; i<4; i++ ) if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001;
total = 0.0; for( i=0; i<4; i++ ) total += datafreq[i];
-// fprintf( stderr, "total = %f\n", total );
+// reporterr( "total = %f\n", total );
for( i=0; i<4; i++ ) datafreq[i] /= (double)total;
#if 0
- fprintf( stderr, "\ndatafreq = " );
+ reporterr( "\ndatafreq = " );
for( i=0; i<4; i++ )
- fprintf( stderr, "%10.5f ", datafreq[i] );
- fprintf( stderr, "\n" );
+ reporterr( "%10.5f ", datafreq[i] );
+ reporterr( "\n" );
exit( 1 );
#endif
}
int i, j, l;
int aan;
double total;
- for( i=0; i<20; i++ )
+ for( i=0; i<nscoredalphabets; i++ )
datafreq[i] = 0.0;
total = 0.0;
for( i=0; i<nseq; i++ )
for( j=0; j<l; j++ )
{
aan = amino_n[(int)seq[i][j]];
- if( aan >= 0 && aan < 20 )
+ if( aan >= 0 && aan < nscoredalphabets && seq[i][j] != '-' )
{
datafreq[aan] += 1.0;
total += 1.0;
}
}
}
- for( i=0; i<20; i++ )
- if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001;
+ total = 0.0; for( i=0; i<nscoredalphabets; i++ ) total += datafreq[i];
+ for( i=0; i<nscoredalphabets; i++ ) datafreq[i] /= (double)total;
+ for( i=0; i<nscoredalphabets; i++ ) if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001;
- fprintf( stderr, "datafreq = \n" );
- for( i=0; i<20; i++ )
- fprintf( stderr, "%f\n", datafreq[i] );
+// reporterr( "datafreq = \n" );
+// for( i=0; i<nscoredalphabets; i++ )
+// reporterr( "%f\n", datafreq[i] );
- total = 0.0; for( i=0; i<20; i++ ) total += datafreq[i];
- fprintf( stderr, "total = %f\n", total );
- for( i=0; i<20; i++ ) datafreq[i] /= (double)total;
+ total = 0.0; for( i=0; i<nscoredalphabets; i++ ) total += datafreq[i];
+// reporterr( "total = %f\n", total );
+ for( i=0; i<nscoredalphabets; i++ ) datafreq[i] /= (double)total;
}
+static void calcfreq_extended( int nseq, char **seq, double *datafreq )
+{
+ int i, j, l;
+ int aan;
+ double total;
+ for( i=0; i<nscoredalphabets; i++ )
+ datafreq[i] = 0.0;
+ total = 0.0;
+ for( i=0; i<nseq; i++ )
+ {
+ l = strlen( seq[i] );
+ for( j=0; j<l; j++ )
+ {
+ aan = amino_n[(unsigned char)seq[i][j]];
+ if( aan >= 0 && aan < nscoredalphabets && seq[i][j] != '-' )
+ {
+ datafreq[aan] += 1.0;
+ total += 1.0;
+ }
+ }
+ }
+ total = 0.0; for( i=0; i<nscoredalphabets; i++ ) total += datafreq[i];
+ for( i=0; i<nscoredalphabets; i++ ) datafreq[i] /= (double)total;
+// for( i=0; i<nscoredalphabets; i++ ) if( datafreq[i] < 0.0001 ) datafreq[i] = 0.0001;
+
+#if 0
+ reporterr( "datafreq = \n" );
+ for( i=0; i<nscoredalphabets; i++ )
+ reporterr( "%d %c %f\n", i, amino[i], datafreq[i] );
+#endif
+
+ total = 0.0; for( i=0; i<nscoredalphabets; i++ ) total += datafreq[i];
+// reporterr( "total = %f\n", total );
+ for( i=0; i<nscoredalphabets; i++ ) datafreq[i] /= (double)total;
+}
+
+static void generatenuc1pam( double **pam1, int kimuraR, double *freq )
+{
+ int i, j;
+ double R[4][4], mut[4], total, tmp;
+
+ R[0][0] = 0.0; R[0][1] = kimuraR; R[0][2] = 1.0; R[0][3] = 1.0;
+ R[1][0] = kimuraR; R[1][1] = 0.0; R[1][2] = 1.0; R[1][3] = 1.0;
+ R[2][0] = 1.0; R[2][1] = 1.0; R[2][2] = 0.0; R[2][3] = kimuraR;
+ R[3][0] = 1.0; R[3][1] = 1.0; R[3][2] = kimuraR; R[3][3] = 0.0;
+
+
+ total = 0.0;
+ for( i=0; i<4; i++ )
+ {
+ tmp = 0.0;
+ for( j=0; j<4; j++ ) tmp += R[i][j] * freq[j];
+ mut[i] = tmp;
+ total += tmp * freq[i];
+ }
+ for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
+ {
+ if( i != j ) pam1[i][j] = 0.01 / total * R[i][j] * freq[j];
+ else pam1[i][j] = 1.0 - 0.01 / total * mut[i];
+ }
+}
+
+
void constants( int nseq, char **seq )
{
int i, j, x;
// double tmp;
+ char shiftmodel[100];
+ int charsize;
+
+ if( nblosum < 0 ) dorp = 'p';
+
+ if( penalty_shift_factor >= 10 ) trywarp = 0;
+ else trywarp = 1;
if( dorp == 'd' ) /* DNA */
{
double **pam1 = AllocateDoubleMtx( 4, 4 );
double *freq = AllocateDoubleVec( 4 );
+ nalphabets = 26;
+ nscoredalphabets = 10;
+ charsize = 0x80;
+
+ n_dis = AllocateIntMtx( nalphabets, nalphabets );
+ n_disLN = AllocateDoubleMtx( nalphabets, nalphabets );
scoremtx = -1;
if( RNAppenalty == NOTSPECIFIED ) RNAppenalty = DEFAULTRNAGOP_N;
if( RNAppenalty_ex == NOTSPECIFIED ) RNAppenalty_ex = DEFAULTRNAGEP_N;
if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_N;
+ if( ppenalty_dist == NOTSPECIFIED ) ppenalty_dist = ppenalty;
if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_N;
if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_N;
if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_N;
RNApenalty = (int)( 3 * 600.0 / 1000.0 * RNAppenalty + 0.5 );
RNApenalty_ex = (int)( 3 * 600.0 / 1000.0 * RNAppenalty_ex + 0.5 );
-// fprintf( stderr, "DEFAULTRNAGOP_N = %d\n", DEFAULTRNAGOP_N );
-// fprintf( stderr, "RNAppenalty = %d\n", RNAppenalty );
-// fprintf( stderr, "RNApenalty = %d\n", RNApenalty );
+// reporterr( "DEFAULTRNAGOP_N = %d\n", DEFAULTRNAGOP_N );
+// reporterr( "RNAppenalty = %d\n", RNAppenalty );
+// reporterr( "RNApenalty = %d\n", RNApenalty );
RNAthr = (int)( 3 * 600.0 / 1000.0 * RNApthr + 0.5 );
penalty = (int)( 3 * 600.0 / 1000.0 * ppenalty + 0.5);
+ penalty_dist = (int)( 3 * 600.0 / 1000.0 * ppenalty_dist + 0.5);
+ penalty_shift = (int)( penalty_shift_factor * penalty );
penalty_OP = (int)( 3 * 600.0 / 1000.0 * ppenalty_OP + 0.5);
penalty_ex = (int)( 3 * 600.0 / 1000.0 * ppenalty_ex + 0.5);
penalty_EX = (int)( 3 * 600.0 / 1000.0 * ppenalty_EX + 0.5);
- offset = (int)( 3 * 600.0 / 1000.0 * poffset + 0.5);
- offsetFFT = (int)( 3 * 600.0 / 1000.0 * (-0) + 0.5);
- offsetLN = (int)( 3 * 600.0 / 1000.0 * 100 + 0.5);
+ offset = (int)( 1 * 600.0 / 1000.0 * poffset + 0.5);
+ offsetFFT = (int)( 1 * 600.0 / 1000.0 * (-0) + 0.5);
+ offsetLN = (int)( 1 * 600.0 / 1000.0 * 100 + 0.5);
penaltyLN = (int)( 3 * 600.0 / 1000.0 * -2000 + 0.5);
penalty_exLN = (int)( 3 * 600.0 / 1000.0 * -100 + 0.5);
- sprintf( modelname, "%s%d (%d), %6.3f (%6.3f), %6.3f (%6.3f)", rnakozo?"RNA":"DNA", pamN, kimuraR,
- -(double)ppenalty*0.001, -(double)ppenalty*0.003, -(double)poffset*0.001, -(double)poffset*0.003 );
+
+ if( trywarp ) sprintf( shiftmodel, "%4.2f (%4.2f)", -(double)penalty_shift/1800, -(double)penalty_shift/600 );
+ else sprintf( shiftmodel, "noshift" );
+
+ sprintf( modelname, "%s%d (%d), %4.2f (%4.2f), %4.2f (%4.2f), %s", rnakozo?"RNA":"DNA", pamN, kimuraR, -(double)ppenalty*0.001, -(double)ppenalty*0.003, -(double)poffset*0.001, -(double)poffset*0.003, shiftmodel );
+
+ for( i=0; i<26; i++ ) amino[i] = locaminon[i];
+ for( i=0; i<0x80; i++ ) amino_n[i] = -1;
+ for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
+ if( fmodel == 1 )
+ {
+ calcfreq_nuc( nseq, seq, freq );
+ reporterr( "a, freq[0] = %f\n", freq[0] );
+ reporterr( "g, freq[1] = %f\n", freq[1] );
+ reporterr( "c, freq[2] = %f\n", freq[2] );
+ reporterr( "t, freq[3] = %f\n", freq[3] );
+ }
+ else
+ {
+ freq[0] = 0.25;
+ freq[1] = 0.25;
+ freq[2] = 0.25;
+ freq[3] = 0.25;
+ }
+
if( kimuraR == 9999 )
{
average /= 16.0;
if( disp )
- fprintf( stderr, "average = %f\n", average );
+ reporterr( "average = %f\n", average );
for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
pamx[i][j] -= average;
}
else
{
+#if 0
double f = 0.99;
double s = (double)kimuraR / ( 2 + kimuraR ) * 0.01;
double v = (double)1 / ( 2 + kimuraR ) * 0.01;
pam1[1][0] = s; pam1[1][1] = f; pam1[1][2] = v; pam1[1][3] = v;
pam1[2][0] = v; pam1[2][1] = v; pam1[2][2] = f; pam1[2][3] = s;
pam1[3][0] = v; pam1[3][1] = v; pam1[3][2] = s; pam1[3][3] = f;
+#else
+ generatenuc1pam( pam1, kimuraR, freq );
+#endif
- fprintf( stderr, "generating %dPAM scoring matrix for nucleotides ... ", pamN );
+ reporterr( "generating a scoring matrix for nucleotide (dist=%d) ... ", pamN );
if( disp )
{
- fprintf( stderr, " TPM \n" );
+ reporterr( " TPM \n" );
for( i=0; i<4; i++ )
{
for( j=0; j<4; j++ )
- fprintf( stderr, "%+#6.10f", pam1[i][j] );
- fprintf( stderr, "\n" );
+ reporterr( "%+#6.10f", pam1[i][j] );
+ reporterr( "\n" );
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
}
MtxuntDouble( pamx, 4 );
for( x=0; x < pamN; x++ ) MtxmltDouble( pamx, pam1, 4 );
+
+ if( disp )
+ {
+ reporterr( " TPM \n" );
+ for( i=0; i<4; i++ )
+ {
+ for( j=0; j<4; j++ )
+ reporterr( "%+#6.10f", pamx[i][j] );
+ reporterr( "\n" );
+ }
+ reporterr( "\n" );
+ }
+
for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
- pamx[i][j] /= 1.0 / 4.0;
+ pamx[i][j] /= freq[j];
+// pamx[i][j] /= 0.25;
for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
{
if( pamx[i][j] == 0.0 )
{
- fprintf( stderr, "WARNING: pamx[i][j] = 0.0 ?\n" );
+ reporterr( "WARNING: pamx[i][j] = 0.0 ?\n" );
pamx[i][j] = 0.00001; /* by J. Thompson */
}
pamx[i][j] = log10( pamx[i][j] ) * 1000.0;
if( disp )
{
- fprintf( stderr, " after log\n" );
+ reporterr( " after log\n" );
for( i=0; i<4; i++ )
{
for( j=0; j<4; j++ )
- fprintf( stderr, "%+#6.10f", pamx[i][j] );
- fprintf( stderr, "\n" );
+ reporterr( "%+10.6f ", pamx[i][j] );
+ reporterr( "\n" );
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
}
// ?????
- for( i=0; i<26; i++ ) amino[i] = locaminon[i];
- for( i=0; i<0x80; i++ ) amino_n[i] = -1;
- for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
- if( fmodel == 1 )
- calcfreq_nuc( nseq, seq, freq );
- else
- {
- freq[0] = 0.25;
- freq[1] = 0.25;
- freq[2] = 0.25;
- freq[3] = 0.25;
- }
-// fprintf( stderr, "a, freq[0] = %f\n", freq[0] );
-// fprintf( stderr, "g, freq[1] = %f\n", freq[1] );
-// fprintf( stderr, "c, freq[2] = %f\n", freq[2] );
-// fprintf( stderr, "t, freq[3] = %f\n", freq[3] );
-
average = 0.0;
for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
- pamx[i][j] -= offset; /* extending gap cost */
+ pamx[i][j] -= offset;
for( i=0; i<4; i++ ) for( j=0; j<4; j++ )
pamx[i][j] = shishagonyuu( pamx[i][j] );
if( disp )
{
- fprintf( stderr, " after shishagonyuu\n" );
+ reporterr( " after shishagonyuu\n" );
for( i=0; i<4; i++ )
{
for( j=0; j<4; j++ )
- fprintf( stderr, "%+#6.10f", pamx[i][j] );
- fprintf( stderr, "\n" );
+ reporterr( "%+#6.10f", pamx[i][j] );
+ reporterr( "\n" );
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
}
- fprintf( stderr, "done\n" );
+ reporterr( "done\n" );
}
for( i=0; i<5; i++ )
if( disp )
{
- fprintf( stderr, " before dis\n" );
+ reporterr( " before dis\n" );
for( i=0; i<4; i++ )
{
for( j=0; j<4; j++ )
- fprintf( stderr, "%+#6.10f", pamx[i][j] );
- fprintf( stderr, "\n" );
+ reporterr( "%+#6.10f", pamx[i][j] );
+ reporterr( "\n" );
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
}
if( disp )
{
- fprintf( stderr, " score matrix \n" );
+ reporterr( " score matrix \n" );
for( i=0; i<4; i++ )
{
for( j=0; j<4; j++ )
- fprintf( stderr, "%+#6.10f", pamx[i][j] );
- fprintf( stderr, "\n" );
+ reporterr( "%+#6.10f", pamx[i][j] );
+ reporterr( "\n" );
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
+ exit( 1 );
}
for( i=0; i<26; i++ ) amino[i] = locaminon[i];
for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpn[i];
for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0;
for( i=0; i<10; i++ ) for( j=0; j<10; j++ ) n_dis[i][j] = shishagonyuu( pamx[i][j] );
+
+ ambiguousscore( amino_n, n_dis );
+ if( nwildcard ) nscore( amino_n, n_dis );
+
if( disp )
{
- fprintf( stderr, " score matrix \n" );
+ reporterr( " score matrix \n" );
for( i=0; i<26; i++ )
{
for( j=0; j<26; j++ )
- fprintf( stderr, "%+6d", n_dis[i][j] );
- fprintf( stderr, "\n" );
+ reporterr( "%+6d", n_dis[i][j] );
+ reporterr( "\n" );
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
+ reporterr( "penalty = %d, penalty_ex = %d\n", penalty, penalty_ex );
+//exit( 1 );
}
// RIBOSUM
if( disp )
{
- fprintf( stderr, "ribosum after shishagonyuu\n" );
+ reporterr( "ribosum after shishagonyuu\n" );
for( i=0; i<4; i++ )
{
for( j=0; j<4; j++ )
- fprintf( stderr, "%+#6.10f", ribosum4[i][j] );
- fprintf( stderr, "\n" );
+ reporterr( "%+#6.10f", ribosum4[i][j] );
+ reporterr( "\n" );
}
- fprintf( stderr, "\n" );
- fprintf( stderr, "ribosum16 after shishagonyuu\n" );
+ reporterr( "\n" );
+ reporterr( "ribosum16 after shishagonyuu\n" );
for( i=0; i<16; i++ )
{
for( j=0; j<16; j++ )
- fprintf( stderr, "%+#7.0f", ribosum16[i][j] );
- fprintf( stderr, "\n" );
+ reporterr( "%+#7.0f", ribosum16[i][j] );
+ reporterr( "\n" );
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
}
- fprintf( stderr, "done\n" );
+// reporterr( "done\n" );
#if 1
for( i=0; i<37; i++ ) for( j=0; j<37; j++ ) ribosumdis[i][j] = 0.0; //iru
if( disp )
{
- fprintf( stderr, "ribosumdis\n" );
+ reporterr( "ribosumdis\n" );
for( i=0; i<37; i++ )
{
for( j=0; j<37; j++ )
- fprintf( stderr, "%+5d", ribosumdis[i][j] );
- fprintf( stderr, "\n" );
+ reporterr( "%+5d", ribosumdis[i][j] );
+ reporterr( "\n" );
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
}
- fprintf( stderr, "done\n" );
+// reporterr( "done\n" );
#endif
FreeDoubleMtx( pam1 );
free( freq );
}
- else if( dorp == 'p' && scoremtx == 1 ) /* Blosum */
+ else if( dorp == 'p' && scoremtx == 1 && nblosum == -2 ) /* extended */
{
double *freq;
double *freq1;
// double tmp;
double **n_distmp;
+ nalphabets = 0x100;
+ nscoredalphabets = 0x100;
+ charsize = 0x100;
+
+ reporterr( "nalphabets = %d\n", nalphabets );
+
+ n_dis = AllocateIntMtx( nalphabets, nalphabets );
+ n_disLN = AllocateDoubleMtx( nalphabets, nalphabets );
+ n_distmp = AllocateDoubleMtx( nalphabets, nalphabets );
+ datafreq = AllocateDoubleVec( nalphabets );
+ freq = AllocateDoubleVec( nalphabets );
+
+ if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_B;
+ if( ppenalty_dist == NOTSPECIFIED ) ppenalty_dist = ppenalty;
+ if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_B;
+ if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_B;
+ if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_B;
+ if( poffset == NOTSPECIFIED ) poffset = DEFAULTOFS_B;
+ if( pamN == NOTSPECIFIED ) pamN = 0;
+ if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
+ penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 );
+ penalty_dist = (int)( 600.0 / 1000.0 * ppenalty_dist + 0.5 );
+ penalty_shift = (int)( penalty_shift_factor * penalty );
+ penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 );
+ penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 );
+ penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 );
+ offset = (int)( 600.0 / 1000.0 * poffset + 0.5 );
+ offsetFFT = (int)( 600.0 / 1000.0 * (-0) + 0.5);
+ offsetLN = (int)( 600.0 / 1000.0 * 100 + 0.5);
+ penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5);
+ penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5);
+
+ extendedmtx( n_distmp, freq, amino, amino_grp );
+
+ if( trywarp ) sprintf( shiftmodel, "%4.2f", -(double)penalty_shift/600 );
+ else sprintf( shiftmodel, "noshift" );
+
+ sprintf( modelname, "Extended, %4.2f, %+4.2f, %+4.2f, %s", -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000, shiftmodel );
+#if 0
+ for( i=0; i<26; i++ ) amino[i] = locaminod[i];
+ for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpd[i];
+ for( i=0; i<0x80; i++ ) amino_n[i] = 0;
+ for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
+#endif
+ for( i=0; i<0x100; i++ )amino_n[i] = -1;
+ for( i=0; i<nalphabets; i++)
+ {
+ amino_n[(unsigned char)amino[i]] = i;
+// reporterr( "i=%d, amino = %c, amino_n = %d\n", i, amino[i], amino_n[amino[i]] );
+ }
+ if( fmodel == 1 )
+ {
+ calcfreq_extended( nseq, seq, datafreq );
+ freq1 = datafreq;
+ }
+ else
+ freq1 = freq;
+
+#if TEST
+ reporterr( "raw scoreing matrix : \n" );
+ for( i=0; i<nalphabets; i++ )
+ {
+ for( j=0; j<nalphabets; j++ )
+ {
+ fprintf( stdout, "%6.2f", n_distmp[i][j] );
+ }
+ fprintf( stdout, "\n" );
+ }
+#endif
+ if( fmodel == -1 )
+ average = 0.0;
+ else
+ {
+ for( i=0; i<nalphabets; i++ )
+#if TEST
+ fprintf( stdout, "freq[%c] = %f, datafreq[%c] = %f, freq1[] = %f\n", amino[i], freq[i], amino[i], datafreq[i], freq1[i] );
+#endif
+ average = 0.0;
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
+ average += n_distmp[i][j] * freq1[i] * freq1[j];
+ }
+#if TEST
+ fprintf( stdout, "####### average2 = %f\n", average );
+#endif
+
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
+ n_distmp[i][j] -= average;
+#if TEST
+ fprintf( stdout, "average2 = %f\n", average );
+ fprintf( stdout, "after average substruction : \n" );
+ for( i=0; i<nalphabets; i++ )
+ {
+ for( j=0; j<nalphabets; j++ )
+ {
+ fprintf( stdout, "%6.2f", n_distmp[i][j] );
+ }
+ fprintf( stdout, "\n" );
+ }
+#endif
+
+ average = 0.0;
+ for( i=0; i<nalphabets; i++ )
+ average += n_distmp[i][i] * freq1[i];
+#if TEST
+ fprintf( stdout, "####### average1 = %f\n", average );
+#endif
+
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
+ n_distmp[i][j] *= 600.0 / average;
+#if TEST
+ fprintf( stdout, "after average division : \n" );
+ for( i=0; i<nalphabets; i++ )
+ {
+ for( j=0; j<=i; j++ )
+ {
+ fprintf( stdout, "%7.1f", n_distmp[i][j] );
+ }
+ fprintf( stdout, "\n" );
+ }
+#endif
+
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
+ n_distmp[i][j] -= offset;
+#if TEST
+ fprintf( stdout, "after offset substruction (offset = %d): \n", offset );
+ for( i=0; i<nalphabets; i++ )
+ {
+ for( j=0; j<=i; j++ )
+ {
+ fprintf( stdout, "%7.1f", n_distmp[i][j] );
+ }
+ fprintf( stdout, "\n" );
+ }
+#endif
+#if 0
+/* Ãí°Õ ¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª¡ª */
+ penalty -= offset;
+#endif
+
+
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
+ n_distmp[i][j] = shishagonyuu( n_distmp[i][j] );
+
+ if( disp )
+ {
+ fprintf( stdout, "freq = \n" );
+ for( i=0; i<nalphabets; i++ ) fprintf( stdout, "%c %f\n", amino[i], freq1[i] );
+ fprintf( stdout, " scoring matrix \n" );
+ for( i=0; i<nalphabets; i++ )
+ {
+ fprintf( stdout, "%c ", amino[i] );
+ for( j=0; j<nalphabets; j++ )
+ fprintf( stdout, "%5.0f", n_distmp[i][j] );
+ fprintf( stdout, "\n" );
+ }
+ fprintf( stdout, " " );
+ for( i=0; i<nalphabets; i++ )
+ fprintf( stdout, " %c", amino[i] );
+
+ average = 0.0;
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
+ average += n_distmp[i][j] * freq1[i] * freq1[j];
+ fprintf( stdout, "average = %f\n", average );
+
+ average = 0.0;
+ for( i=0; i<nalphabets; i++ )
+ average += n_distmp[i][i] * freq1[i];
+ fprintf( stdout, "itch average = %f\n", average );
+ reporterr( "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
+
+
+ exit( 1 );
+ }
+
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) n_dis[i][j] = 0;
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) n_dis[i][j] = (int)n_distmp[i][j];
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) n_dis[i][amino_n['-']] = n_dis[amino_n['-']][i] = 0.0;
+
+ FreeDoubleMtx( n_distmp );
+ FreeDoubleVec( datafreq );
+ FreeDoubleVec( freq );
+
+// reporterr( "done.\n" );
+
+ }
+ else if( dorp == 'p' && scoremtx == 1 ) /* Blosum, user-defined */
+ {
+ double *freq;
+ double *freq1;
+ double *datafreq;
+ double average;
+ double iaverage;
+// double tmp;
+ double **n_distmp;
+ int makeaverage0;
+
+
+ if( nblosum == 0 )
+ {
+ reporterr( "nblosum=%d??\n", nblosum );
+ exit( 1 );
+ }
+// if( nblosum < 0 )
+// {
+// nblosum *= -1;
+// makeaverage0 = 0;
+// }
+ else
+ {
+ makeaverage0 = 1;
+ }
+
+ nalphabets = 26;
+ nscoredalphabets = 20;
+ charsize = 0x80;
+
+ n_dis = AllocateIntMtx( nalphabets, nalphabets );
+ n_disLN = AllocateDoubleMtx( nalphabets, nalphabets );
n_distmp = AllocateDoubleMtx( 20, 20 );
datafreq = AllocateDoubleVec( 20 );
freq = AllocateDoubleVec( 20 );
if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_B;
+ if( ppenalty_dist == NOTSPECIFIED ) ppenalty_dist = ppenalty;
if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_B;
if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_B;
if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_B;
if( pamN == NOTSPECIFIED ) pamN = 0;
if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 );
+ penalty_dist = (int)( 600.0 / 1000.0 * ppenalty_dist + 0.5 );
+ penalty_shift = (int)( penalty_shift_factor * penalty );
penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 );
penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 );
penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 );
penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5);
BLOSUMmtx( nblosum, n_distmp, freq, amino, amino_grp );
+
+ if( trywarp ) sprintf( shiftmodel, "%4.2f", -(double)penalty_shift/600 );
+ else sprintf( shiftmodel, "noshift" );
+
if( nblosum == -1 )
- sprintf( modelname, "User-defined, %6.3f, %+6.3f, %+6.3f", -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000 );
+ sprintf( modelname, "User-defined, %4.2f, %+4.2f, %+4.2f, %s", -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000, shiftmodel );
else
- sprintf( modelname, "BLOSUM%d, %6.3f, %+6.3f, %+6.3f", nblosum, -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000 );
+ sprintf( modelname, "BLOSUM%d, %4.2f, %+4.2f, %+4.2f, %s", nblosum, -(double)ppenalty/1000, -(double)poffset/1000, -(double)ppenalty_ex/1000, shiftmodel );
#if 0
for( i=0; i<26; i++ ) amino[i] = locaminod[i];
for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpd[i];
else
freq1 = freq;
#if TEST
- fprintf( stderr, "raw scoreing matrix : \n" );
+ reporterr( "raw scoreing matrix : \n" );
for( i=0; i<20; i++ )
{
for( j=0; j<20; j++ )
fprintf( stdout, "####### average2 = %f\n", average );
#endif
- for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
- n_distmp[i][j] -= average;
+ if( makeaverage0 )
+ {
+ for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
+ n_distmp[i][j] -= average;
+ }
#if TEST
fprintf( stdout, "average2 = %f\n", average );
fprintf( stdout, "after average substruction : \n" );
average = 0.0;
for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
average += n_distmp[i][j] * freq1[i] * freq1[j];
- fprintf( stdout, "average = %f\n", average );
+ fprintf( stdout, "\naverage = %f\n", average );
- average = 0.0;
+ iaverage = 0.0;
for( i=0; i<20; i++ )
- average += n_distmp[i][i] * freq1[i];
- fprintf( stdout, "itch average = %f\n", average );
- fprintf( stderr, "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
+ iaverage += n_distmp[i][i] * freq1[i];
+ fprintf( stdout, "itch average = %f, E=%f\n", iaverage, average/iaverage );
+ reporterr( "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
exit( 1 );
FreeDoubleVec( datafreq );
FreeDoubleVec( freq );
- fprintf( stderr, "done.\n" );
+// reporterr( "done.\n" );
}
else if( dorp == 'p' && scoremtx == 2 ) /* Miyata-Yasunaga */
{
- fprintf( stderr, "Not supported\n" );
+ reporterr( "Not supported\n" );
exit( 1 );
- for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = locn_dism[i][j];
- for( i=0; i<26; i++ ) if( i != 24 ) n_dis[i][24] = n_dis[24][i] = exgpm;
- n_dis[24][24] = 0;
- if( ppenalty == NOTSPECIFIED ) ppenalty = locpenaltym;
- if( poffset == NOTSPECIFIED ) poffset = -20;
- if( pamN == NOTSPECIFIED ) pamN = 0;
- if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
-
- penalty = ppenalty;
- offset = poffset;
-
- sprintf( modelname, "Miyata-Yasunaga, %6.3f, %6.3f", -(double)ppenalty/1000, -(double)poffset/1000 );
- for( i=0; i<26; i++ ) amino[i] = locaminom[i];
- for( i=0; i<26; i++ ) amino_grp[(int)amino[i]] = locgrpm[i];
-#if DEBUG
- fprintf( stdout, "scoreing matrix : \n" );
- for( i=0; i<26; i++ )
- {
- for( j=0; j<26; j++ )
- {
- fprintf( stdout, "%#5d", n_dis[i][j] );
- }
- fprintf( stdout, "\n" );
- }
-#endif
}
else /* JTT */
{
double *mutab;
double *datafreq;
double average;
+ double iaverage;
double tmp;
double delta;
+ int makeaverage0;
+
+ nalphabets = 26;
+ nscoredalphabets = 20;
+ charsize = 0x80;
+ n_dis = AllocateIntMtx( nalphabets, nalphabets );
+ n_disLN = AllocateDoubleMtx( nalphabets, nalphabets );
rsr = AllocateDoubleMtx( 20, 20 );
pam1 = AllocateDoubleMtx( 20, 20 );
pamx = AllocateDoubleMtx( 20, 20 );
datafreq = AllocateDoubleVec( 20 );
if( ppenalty == NOTSPECIFIED ) ppenalty = DEFAULTGOP_J;
+ if( ppenalty_dist == NOTSPECIFIED ) ppenalty_dist = ppenalty;
if( ppenalty_OP == NOTSPECIFIED ) ppenalty_OP = DEFAULTGOP_J;
if( ppenalty_ex == NOTSPECIFIED ) ppenalty_ex = DEFAULTGEP_J;
if( ppenalty_EX == NOTSPECIFIED ) ppenalty_EX = DEFAULTGEP_J;
if( pamN == NOTSPECIFIED ) pamN = DEFAULTPAMN;
if( kimuraR == NOTSPECIFIED ) kimuraR = 1;
+ if( pamN == 0 )
+ {
+ reporterr( "pamN=%d??\n", pamN );
+ exit( 1 );
+ }
+ if( pamN < 0 )
+ {
+ pamN *= -1;
+ makeaverage0 = 0;
+ }
+ else
+ {
+ makeaverage0 = 1;
+ }
+
penalty = (int)( 600.0 / 1000.0 * ppenalty + 0.5 );
+ penalty_dist = (int)( 600.0 / 1000.0 * ppenalty_dist + 0.5 );
+ penalty_shift = (int)( penalty_shift_factor * penalty );
penalty_OP = (int)( 600.0 / 1000.0 * ppenalty_OP + 0.5 );
penalty_ex = (int)( 600.0 / 1000.0 * ppenalty_ex + 0.5 );
penalty_EX = (int)( 600.0 / 1000.0 * ppenalty_EX + 0.5 );
penaltyLN = (int)( 600.0 / 1000.0 * -2000 + 0.5);
penalty_exLN = (int)( 600.0 / 1000.0 * -100 + 0.5);
- sprintf( modelname, "%s %dPAM, %6.3f, %6.3f", (TMorJTT==TM)?"Transmembrane":"JTT", pamN, -(double)ppenalty/1000, -(double)poffset/1000 );
+ if( trywarp ) sprintf( shiftmodel, "%4.2f", -(double)penalty_shift/600 );
+ else sprintf( shiftmodel, "noshift" );
+
+ sprintf( modelname, "%s %dPAM, %4.2f, %4.2f, %s", (TMorJTT==TM)?"Transmembrane":"JTT", pamN, -(double)ppenalty/1000, -(double)poffset/1000, shiftmodel );
JTTmtx( rsr, freq, amino, amino_grp, (int)(TMorJTT==TM) );
+ for( i=0; i<0x80; i++ ) amino_n[i] = -1;
+ for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
+ if( fmodel == 1 )
+ {
+ calcfreq( nseq, seq, datafreq );
+ freq1 = datafreq;
+ }
+ else
+ freq1 = freq;
+
+
#if TEST
fprintf( stdout, "rsr = \n" );
for( i=0; i<20; i++ )
}
#endif
- for( i=0; i<0x80; i++ ) amino_n[i] = -1;
- for( i=0; i<26; i++ ) amino_n[(int)amino[i]] = i;
- if( fmodel == 1 )
- {
- calcfreq( nseq, seq, datafreq );
- freq1 = datafreq;
- }
- else
- freq1 = freq;
-
- fprintf( stderr, "generating %dPAM %s scoring matrix for amino acids ... ", pamN, (TMorJTT==TM)?"Transmembrane":"JTT" );
+ reporterr( "generating %dPAM %s scoring matrix for amino acids ... ", pamN, (TMorJTT==TM)?"Transmembrane":"JTT" );
tmp = 0.0;
for( i=0; i<20; i++ )
{
mutab[i] = 0.0;
for( j=0; j<20; j++ )
- mutab[i] += rsr[i][j] * freq[j];
- tmp += mutab[i] * freq[i];
+ mutab[i] += rsr[i][j] * freq1[j];
+ tmp += mutab[i] * freq1[i];
}
#if TEST
fprintf( stdout, "mutability = \n" );
for( j=0; j<20; j++ )
{
if( i != j )
- pam1[i][j] = delta * rsr[i][j] * freq[i];
+ pam1[i][j] = delta * rsr[i][j] * freq1[j];
else
pam1[i][j] = 1.0 - delta * mutab[i];
}
for( x=0; x < pamN; x++ ) MtxmltDouble( pamx, pam1, 20 );
for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
- pamx[i][j] /= freq[j];
+ pamx[i][j] /= freq1[j];
for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
{
if( pamx[i][j] == 0.0 )
{
- fprintf( stderr, "WARNING: pamx[%d][%d] = 0.0?\n", i, j );
+ reporterr( "WARNING: pamx[%d][%d] = 0.0?\n", i, j );
pamx[i][j] = 0.00001; /* by J. Thompson */
}
pamx[i][j] = log10( pamx[i][j] ) * 1000.0;
fprintf( stdout, "####### average2 = %f\n", average );
#endif
- for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
- pamx[i][j] -= average;
+ if( makeaverage0 )
+ {
+ for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
+ pamx[i][j] -= average;
+ }
#if TEST
fprintf( stdout, "average2 = %f\n", average );
fprintf( stdout, "after average substruction : \n" );
average = 0.0;
for( i=0; i<20; i++ ) for( j=0; j<20; j++ )
average += pamx[i][j] * freq1[i] * freq1[j];
- fprintf( stdout, "average = %f\n", average );
+ fprintf( stdout, "\naverage = %f\n", average );
- average = 0.0;
+ iaverage = 0.0;
for( i=0; i<20; i++ )
- average += pamx[i][i] * freq1[i];
- fprintf( stdout, "itch average = %f\n", average );
- fprintf( stderr, "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
+ iaverage += pamx[i][i] * freq1[i];
+ fprintf( stdout, "itch average = %f, E=%f\n", average, average/iaverage );
+ reporterr( "parameters: %d, %d, %d\n", penalty, penalty_ex, offset );
exit( 1 );
for( i=0; i<26; i++ ) for( j=0; j<26; j++ ) n_dis[i][j] = 0;
for( i=0; i<20; i++ ) for( j=0; j<20; j++ ) n_dis[i][j] = (int)pamx[i][j];
- fprintf( stderr, "done.\n" );
+ reporterr( "done.\n" );
FreeDoubleMtx( rsr );
FreeDoubleMtx( pam1 );
FreeDoubleMtx( pamx );
FreeDoubleVec( mutab );
FreeDoubleVec( datafreq );
}
- fprintf( stderr, "scoremtx = %d\n", scoremtx );
#if DEBUG
- fprintf( stderr, "scoremtx = %d\n", scoremtx );
- fprintf( stderr, "amino[] = %s\n", amino );
+ reporterr( "scoremtx = %d\n", scoremtx );
+ reporterr( "amino[] = %s\n", amino );
#endif
- for( i=0; i<0x80; i++ )amino_n[i] = -1;
- for( i=0; i<26; i++) amino_n[(int)amino[i]] = i;
- for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_dis[i][j] = 0;
- for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_disLN[i][j] = 0;
- for( i=0; i<0x80; i++ ) for( j=0; j<0x80; j++ ) amino_dis_consweight_multi[i][j] = 0.0;
- for( i=0; i<26; i++) for( j=0; j<26; j++ )
+ amino_dis = AllocateIntMtx( charsize, charsize );
+ amino_dis_consweight_multi = AllocateDoubleMtx( charsize, charsize );
+
+// reporterr( "charsize=%d\n", charsize );
+
+ for( i=0; i<charsize; i++ )amino_n[i] = -1;
+ for( i=0; i<nalphabets; i++) amino_n[(int)amino[i]] = i;
+ for( i=0; i<charsize; i++ ) for( j=0; j<charsize; j++ ) amino_dis[i][j] = 0;
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ ) n_disLN[i][j] = 0;
+ for( i=0; i<charsize; i++ ) for( j=0; j<charsize; j++ ) amino_dis_consweight_multi[i][j] = 0.0;
+
+ n_dis_consweight_multi = AllocateDoubleMtx( nalphabets, nalphabets );
+ n_disFFT = AllocateIntMtx( nalphabets, nalphabets );
+ for( i=0; i<nalphabets; i++) for( j=0; j<nalphabets; j++ )
{
amino_dis[(int)amino[i]][(int)amino[j]] = n_dis[i][j];
- n_dis_consweight_multi[i][j] = (float)n_dis[i][j] * consweight_multi;
+ n_dis_consweight_multi[i][j] = (double)n_dis[i][j] * consweight_multi;
amino_dis_consweight_multi[(int)amino[i]][(int)amino[j]] = (double)n_dis[i][j] * consweight_multi;
}
if( dorp == 'd' ) /* DNA */
{
+#if 0 // ???
for( i=0; i<5; i++) for( j=0; j<5; j++ )
- amino_disLN[(int)amino[i]][(int)amino[j]] = n_dis[i][j] + offset - offsetLN;
+ n_disLN[i][j] = (double)n_dis[i][j] + offset - offsetLN;
for( i=5; i<10; i++) for( j=5; j<10; j++ )
- amino_disLN[(int)amino[i]][(int)amino[j]] = n_dis[i][j] + offset - offsetLN;
+ n_disLN[i][j] = (double)n_dis[i][j] + offset - offsetLN;
for( i=0; i<5; i++) for( j=0; j<5; j++ )
n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
for( i=5; i<10; i++) for( j=5; j<10; j++ )
n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
+#else
+ for( i=0; i<10; i++) for( j=0; j<10; j++ )
+ n_disLN[i][j] = (double)n_dis[i][j] + offset - offsetLN;
+ for( i=0; i<10; i++) for( j=0; j<10; j++ )
+ n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
+#endif
}
else // protein
{
for( i=0; i<20; i++) for( j=0; j<20; j++ )
- amino_disLN[(int)amino[i]][(int)amino[j]] = n_dis[i][j] + offset - offsetLN;
+ n_disLN[i][j] = (double)n_dis[i][j] + offset - offsetLN;
for( i=0; i<20; i++) for( j=0; j<20; j++ )
n_disFFT[i][j] = n_dis[i][j] + offset - offsetFFT;
}
#if 0
- fprintf( stderr, "amino_dis (offset = %d): \n", offset );
+ reporterr( "amino_dis (offset = %d): \n", offset );
for( i=0; i<20; i++ )
{
for( j=0; j<20; j++ )
{
- fprintf( stderr, "%5d", amino_dis[(int)amino[i]][(int)amino[j]] );
+ reporterr( "%5d", amino_dis[(int)amino[i]][(int)amino[j]] );
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
}
- fprintf( stderr, "amino_disLN (offsetLN = %d): \n", offsetLN );
+ reporterr( "amino_disLN (offsetLN = %d): \n", offsetLN );
for( i=0; i<20; i++ )
{
for( j=0; j<20; j++ )
{
- fprintf( stderr, "%5d", amino_disLN[(int)amino[i]][(int)amino[j]] );
+ reporterr( "%5d", amino_disLN[(int)amino[i]][(int)amino[j]] );
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
}
- fprintf( stderr, "n_dis (offset = %d): \n", offset );
+ reporterr( "n_dis (offset = %d): \n", offset );
for( i=0; i<26; i++ )
{
for( j=0; j<26; j++ )
{
- fprintf( stderr, "%5d", n_dis[i][j] );
+ reporterr( "%5d", n_dis[i][j] );
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
}
- fprintf( stderr, "n_disFFT (offsetFFT = %d): \n", offsetFFT );
+ reporterr( "n_disFFT (offsetFFT = %d): \n", offsetFFT );
for( i=0; i<26; i++ )
{
for( j=0; j<26; j++ )
{
- fprintf( stderr, "%5d", n_disFFT[i][j] );
+ reporterr( "%5d", n_disFFT[i][j] );
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
}
exit( 1 );
#endif
#endif
}
}
+
+void freeconstants()
+{
+ if( n_disLN ) FreeDoubleMtx( n_disLN ); n_disLN = NULL;
+ if( n_dis ) FreeIntMtx( n_dis ); n_dis = NULL;
+ if( n_disFFT ) FreeIntMtx( n_disFFT ); n_disFFT = NULL;
+ if( n_dis_consweight_multi ) FreeDoubleMtx( n_dis_consweight_multi ); n_dis_consweight_multi = NULL;
+ if( amino_dis ) FreeIntMtx( amino_dis ); amino_dis = NULL;
+ if( amino_dis_consweight_multi ) FreeDoubleMtx( amino_dis_consweight_multi ); amino_dis_consweight_multi = NULL;
+}
char *pt;
int i;
int left, right;
- float prob;
+ double prob;
pairnum = (int *)calloc( length, sizeof( int ) );
for( i=0; i<length; i++ ) pairnum[i] = 0;
{
pt++;
// fprintf( stderr, "pt=%s\n", pt );
- sscanf( pt, "%d:%f", &right, &prob );
+ sscanf( pt, "%d:%lf", &right, &prob );
right--;
// fprintf( stderr, "%d-%d, %f\n", left, right, prob );
RNApair *pairprobpt;
RNApair *pt;
int *alnpairnum;
- float prob;
+ double prob;
int adpos;
arguments( argc, argv );
for( i=0; i<nlenmax; i++ ) for( pairprobpt=alnpairprob[i]; pairprobpt->bestpos!=-1; pairprobpt++ )
{
- pairprobpt->bestscore /= (float)njob;
+ pairprobpt->bestscore /= (double)njob;
left = i;
right = pairprobpt->bestpos;
prob = pairprobpt->bestscore;
{
case 'i':
inputfile = *++argv;
- fprintf( stderr, "inputfile = %s\n", inputfile );
+// fprintf( stderr, "inputfile = %s\n", inputfile );
--argc;
goto nextoption;
default:
{
FILE *infp;
int nlenmin;
+ double nfreq;
arguments( argc, argv );
infp = stdin;
dorp = NOTSPECIFIED;
- getnumlen_nogap( infp, &nlenmin );
+ getnumlen_nogap_countn( infp, &nlenmin, &nfreq );
- fprintf( stdout, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp );
+ fprintf( stdout, "%d x %d - %d %c nfreq=%f\n", njob, nlenmax, nlenmin, dorp, nfreq );
+
+ fclose( infp );
return( 0 );
}
int parallelizationstrategy = BAATARI1;
-char modelname[100];
+char modelname[500];
int njob, nlenmax;
-int amino_n[0x80];
-char amino_grp[0x80];
-int amino_dis[0x80][0x80];
-int amino_disLN[0x80][0x80];
-double amino_dis_consweight_multi[0x80][0x80];
-int n_dis[26][26];
-int n_disFFT[26][26];
-float n_dis_consweight_multi[26][26];
-char amino[26];
-double polarity[20];
-double volume[20];
+int amino_n[0x100];
+char amino_grp[0x100];
+//int amino_dis[0x100][0x100];
+int **amino_dis = NULL;
+double **n_disLN = NULL;
+//double amino_dis_consweight_multi[0x100][0x100];
+double **amino_dis_consweight_multi = NULL;
+int **n_dis = NULL;
+int **n_disFFT = NULL;
+double **n_dis_consweight_multi = NULL;
+unsigned char amino[0x100];
+double polarity[0x100];
+double volume[0x100];
int ribosumdis[37][37];
int ppid;
int nblosum; // 45, 50, 62, 80
int kobetsubunkatsu;
int bunkatsu;
-int dorp;
+int dorp = NOTSPECIFIED; // arguments de shitei suruto, tbfast -> pairlocalalign no yobidashi de futsugou
int niter;
int contin;
int calledByXced;
int check;
double cut;
int cooling;
+int trywarp = 0;
int penalty, ppenalty, penaltyLN;
+int penalty_dist, ppenalty_dist;
int RNApenalty, RNAppenalty;
int RNApenalty_ex, RNAppenalty_ex;
int penalty_ex, ppenalty_ex, penalty_exLN;
int penalty_EX, ppenalty_EX;
int penalty_OP, ppenalty_OP;
+int penalty_shift, ppenalty_shift;
+double penalty_shift_factor = 100.0;
int RNAthr, RNApthr;
int offset, poffset, offsetLN, offsetFFT;
int scoremtx;
int disopt;
int pamN;
int checkC;
-float geta2;
+double geta2;
int treemethod;
int kimuraR;
char *swopt;
int rnakozo;
char rnaprediction;
int scoreout = 0;
+int spscoreout = 0;
int outnumber = 0;
+int legacygapcost = 0;
+double minimumweight = 0.0005;
+int nwildcard = 0;
char *signalSM;
FILE *prep_g;
char **seq_g;
char **res_g;
-float consweight_multi = 1.0;
-float consweight_rna = 0.0;
+double consweight_multi = 1.0;
+double consweight_rna = 0.0;
char RNAscoremtx = 'n';
-char *newgapstr = "-";
+char TLS *newgapstr = "-";
+
+int nalphabets = 26;
+int nscoredalphabets = 20;
+
+double specificityconsideration = 0.0;
+int ndistclass = 10;
+int maxdistclass = -1;
+
+int gmsg = 0;
+
+double sueff_global = SUEFF;
+
+double lenfaca, lenfacb, lenfacc, lenfacd;
+int maxl, tsize;
+
+void initglobalvariables()
+{
+ commonAlloc1 = 0;
+ commonAlloc2 = 0;
+ commonIP = NULL;
+ commonJP = NULL;
+ nthread = 1;
+ randomseed = 0;
+ parallelizationstrategy = BAATARI1;
+
+ trywarp = 0;
+ penalty_shift_factor = 100.0;
+ outgap = 1;
+ addprofile = 1;
+ scoreout = 0;
+ outnumber = 0;
+ legacygapcost = 0;
+ consweight_multi = 1.0;
+ consweight_rna = 0.0;
+ RNAscoremtx = 'n';
+
+ newgapstr = "-";
+
+ nalphabets = 26;
+ nscoredalphabets = 20;
+
+ specificityconsideration = 0.0;
+ ndistclass = 10;
+ maxdistclass = -1;
+
+ gmsg = 0;
+}
#include "mltaln.h"
+
+
#define DEBUG 0
#define IODEBUG 0
#define SCOREOUT 0
+#define SKIP 1
#define END_OF_VEC -1
static int treeout;
static int noalign;
static int distout;
-static float lenfaca, lenfacb, lenfacc, lenfacd;
+static int tuplesize;
+static int subalignment;
+static int subalignmentoffset;
+static int nguidetree;
+static int sparsepickup;
+static int keeplength;
+static int ndeleted;
+static int mapout;
+static int smoothing;
+static int compacttree = 0;
+static double maxdistmtxsize;
+
#if 0
#define PLENFACA 0.0123
#define PLENFACB 10252
#define PLENFACB 10000
#define PLENFACC 10000
#define PLENFACD 0.1
-#define DLENFACA 0.01
-#define DLENFACB 2500
-#define DLENFACC 2500
-#define DLENFACD 0.1
+#define D6LENFACA 0.01
+#define D6LENFACB 2500
+#define D6LENFACC 2500
+#define D6LENFACD 0.1
+#define D10LENFACA 0.01
+#define D10LENFACB 1000000
+#define D10LENFACC 1000000
+#define D10LENFACD 0.0
+#endif
+
+typedef struct _jobtable
+{
+ int i;
+ int j;
+} Jobtable;
+
+typedef struct _msacompactdistmtxthread_arg
+{
+ int njob;
+ int thread_no;
+ int *selfscore;
+ double **partmtx;
+ char **seq;
+ int **skiptable;
+ double *mindist;
+ int *mindistfrom;
+ int *jobpospt;
+#ifdef enablemultithread
+ pthread_mutex_t *mutex;
+#endif
+} msacompactdistmtxthread_arg_t;
+
+typedef struct _compactdistmtxthread_arg
+{
+ int njob;
+ int thread_no;
+ int *nogaplen;
+ int **pointt;
+ int *selfscore;
+ double **partmtx;
+ int *jobpospt;
+ double *mindist;
+ int *mindistfrom;
+#ifdef enablemultithread
+ pthread_mutex_t *mutex;
+#endif
+} compactdistmtxthread_arg_t;
+
+typedef struct _msadistmtxthread_arg
+{
+ int njob;
+ int thread_no;
+ int *selfscore;
+ double **iscore;
+ double **partmtx;
+ char **seq;
+ int **skiptable;
+ Jobtable *jobpospt;
+#ifdef enablemultithread
+ pthread_mutex_t *mutex;
#endif
+} msadistmtxthread_arg_t;
#ifdef enablemultithread
+// ue futatsu ha singlethread demo tsukau
typedef struct _treebasethread_arg
{
int thread_no;
double *effarr;
int *alloclenpt;
int *fftlog;
+ char *mergeoralign;
+ double **newdistmtx;
+ int *selfscore;
pthread_mutex_t *mutex;
pthread_cond_t *treecond;
} treebasethread_arg_t;
int njob;
int *jobpospt;
int **pointt;
- float **mtx;
+ double **mtx;
pthread_mutex_t *mutex;
} distancematrixthread_arg_t;
#endif
tbrweight = 3;
checkC = 0;
treemethod = 'X';
+ sueff_global = 0.1;
contin = 0;
scoremtx = 1;
kobetsubunkatsu = 0;
dorp = NOTSPECIFIED;
+ ppenalty_dist = NOTSPECIFIED;
ppenalty = -1530;
ppenalty_ex = NOTSPECIFIED;
+ penalty_shift_factor = 1000.0;
poffset = -123;
kimuraR = NOTSPECIFIED;
pamN = NOTSPECIFIED;
fftThreshold = NOTSPECIFIED;
TMorJTT = JTT;
scoreout = 0;
+ spscoreout = 0;
+ tuplesize = 6;
+ subalignment = 0;
+ subalignmentoffset = 0;
+ legacygapcost = 0;
+ specificityconsideration = 0.0;
+ nguidetree = 1;
+ sparsepickup = 0;
+ keeplength = 0;
+ mapout = 0;
+ smoothing = 0;
+ nwildcard = 0;
while( --argc > 0 && (*++argv)[0] == '-' )
{
{
case 'i':
inputfile = *++argv;
- fprintf( stderr, "inputfile = %s\n", inputfile );
+ reporterr( "inputfile = %s\n", inputfile );
--argc;
goto nextoption;
case 'I':
- nadd = atoi( *++argv );
- fprintf( stderr, "nadd = %d\n", nadd );
+ nadd = myatoi( *++argv );
+ reporterr( "nadd = %d\n", nadd );
+ --argc;
+ goto nextoption;
+ case 'V':
+ ppenalty_dist = (int)( atof( *++argv ) * 1000 - 0.5 );
+// fprintf( stderr, "ppenalty = %d\n", ppenalty );
--argc;
goto nextoption;
case 'f':
ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
-// fprintf( stderr, "ppenalty = %d\n", ppenalty );
+// reporterr( "ppenalty = %d\n", ppenalty );
+ --argc;
+ goto nextoption;
+ case 'Q':
+ penalty_shift_factor = atof( *++argv );
--argc;
goto nextoption;
case 'g':
ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
- fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
+ reporterr( "ppenalty_ex = %d\n", ppenalty_ex );
--argc;
goto nextoption;
case 'h':
poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
-// fprintf( stderr, "poffset = %d\n", poffset );
+// reporterr( "poffset = %d\n", poffset );
--argc;
goto nextoption;
case 'k':
- kimuraR = atoi( *++argv );
- fprintf( stderr, "kappa = %d\n", kimuraR );
+ kimuraR = myatoi( *++argv );
+ reporterr( "kappa = %d\n", kimuraR );
--argc;
goto nextoption;
case 'b':
- nblosum = atoi( *++argv );
+ nblosum = myatoi( *++argv );
scoremtx = 1;
-// fprintf( stderr, "blosum %d / kimura 200 \n", nblosum );
+// reporterr( "blosum %d / kimura 200 \n", nblosum );
--argc;
goto nextoption;
case 'j':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = JTT;
- fprintf( stderr, "jtt/kimura %d\n", pamN );
+ reporterr( "jtt/kimura %d\n", pamN );
--argc;
goto nextoption;
case 'm':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = TM;
- fprintf( stderr, "tm %d\n", pamN );
+ reporterr( "tm %d\n", pamN );
--argc;
goto nextoption;
case 'C':
- nthread = atoi( *++argv );
- fprintf( stderr, "nthread = %d\n", nthread );
+ nthread = myatoi( *++argv );
+ reporterr( "nthread = %d\n", nthread );
+ --argc;
+ goto nextoption;
+ case 's':
+ specificityconsideration = (double)myatof( *++argv );
+// reporterr( "specificityconsideration = %f\n", specificityconsideration );
--argc;
goto nextoption;
#if 1
case 'T':
noalign = 1;
break;
+#if 0
case 'r':
fmodel = -1;
break;
+#endif
case 'D':
dorp = 'd';
break;
case 'P':
dorp = 'p';
break;
+ case 'L':
+ legacygapcost = 1;
+ break;
case 'e':
fftscore = 0;
break;
+ case 'H':
+ subalignment = 1;
+ subalignmentoffset = myatoi( *++argv );
+ --argc;
+ goto nextoption;
#if 0
case 'R':
fftRepeatStop = 1;
case 'n' :
outnumber = 1;
break;
+#if 0
case 's':
treemethod = 's';
break;
- case 'X':
- treemethod = 'X'; // mix
- break;
- case 'E':
- treemethod = 'E'; // upg (average)
- break;
case 'q':
treemethod = 'q'; // minimum
break;
+#endif
+ case 'q':
+ sparsepickup = myatoi( *++argv );
+// reporterr( "sparsepickup = %d\n", sparsepickup );
+ --argc;
+ goto nextoption;
+ case 'X':
+ treemethod = 'X';
+ sueff_global = atof( *++argv );
+// fprintf( stderr, "sueff_global = %f\n", sueff_global );
+ --argc;
+ goto nextoption;
+ case 'E':
+ nguidetree = myatoi( *++argv );
+// reporterr( "nguidetree = %d\n", nguidetree );
+ --argc;
+ goto nextoption;
#if 0
case 'a':
alg = 'a';
break;
-#endif
- case 'R':
- alg = 'R';
- break;
- case 'Q':
- alg = 'Q';
- break;
case 'H':
alg = 'H';
break;
+ case 'R':
+ alg = 'R';
+ break;
+#endif
case 'A':
alg = 'A';
break;
+ case '&':
+ alg = 'a';
+ break;
+ case '@':
+ alg = 'd';
+ break;
case 'N':
nevermemsave = 1;
break;
case 'M':
alg = 'M';
break;
- case 'S':
- scoreout = 1;
+#if 0
+ case 'S' :
+ scoreout = 1; // for checking parallel calculation
+ break;
+#else
+ case 'S' :
+ spscoreout = 1; // 2014/Dec/30, sp score
break;
- case 'B':
+#endif
+ case 'B': // hitsuyou! memopt -M -B no tame
break;
case 'F':
use_fft = 1;
use_fft = 1;
force_fft = 1;
break;
+#if 0
case 'V':
topin = 1;
break;
+#endif
case 'U':
treein = 1;
break;
case 'v':
tbrweight = 3;
break;
+#if 1
case 'd':
disp = 1;
break;
+#endif
#if 1
case 'O':
outgap = 0;
tbutree = 0;
break;
case 'z':
- fftThreshold = atoi( *++argv );
+ fftThreshold = myatoi( *++argv );
--argc;
goto nextoption;
case 'w':
- fftWinSize = atoi( *++argv );
+ fftWinSize = myatoi( *++argv );
--argc;
goto nextoption;
+ case 'W':
+ tuplesize = myatoi( *++argv );
+ --argc;
+ goto nextoption;
+#if 0
case 'Z':
checkC = 1;
break;
+#endif
+ case 'Y':
+ keeplength = 1;
+ break;
+ case 'Z':
+ mapout = 1;
+ break;
+ case 'p':
+ smoothing = 1;
+ break;
+ case ':':
+ nwildcard = 1;
+ break;
default:
- fprintf( stderr, "illegal option %c\n", c );
+ reporterr( "illegal option %c\n", c );
argc = 0;
break;
}
}
if( argc != 0 )
{
- fprintf( stderr, "options: Check source file !\n" );
+ reporterr( "options: Check source file !\n" );
exit( 1 );
}
if( tbitr == 1 && outgap == 0 )
{
- fprintf( stderr, "conflicting options : o, m or u\n" );
+ reporterr( "conflicting options : o, m or u\n" );
exit( 1 );
}
}
+static int varpairscore( int nseq, int npick, int nlenmax, char **seq, int seed )
+{
+ int i, j, npair;
+ int *slist;
+ char **pickseq;
+ double score;
+ double scoreav;
+ double scoreav2;
+ double scorestd;
+ double scorevar;
+ slist = calloc( nseq, sizeof( int ) );
+ pickseq = AllocateCharMtx( npick, nlenmax );
+ reporterr( "nseq = %d, nlenmax=%d, seed=%d\n", nseq, nlenmax, seed );
+
+ srand( seed );
+
+ for( i=0; i<nseq; i++ ) slist[i] = i;
+// for( i=0; i<nseq; i++ ) reporterr( "slist[%d] = %d\n", i, slist[i] );
+
+ stringshuffle( slist, nseq );
+ for( i=0; i<npick; i++ ) gappick0( pickseq[i], seq[slist[i]] );
+
+ scoreav = 0.0;
+ scoreav2 = 0.0;
+ npair = npick * (npick-1) / 2;
+ for( i=1; i<npick; i++ )
+ {
+ reporterr( "%d / %d\r", i, npick );
+ for( j=0; j<i; j++ )
+ {
+ score = G__align11_noalign( n_dis_consweight_multi, -1200, -60, pickseq+i, pickseq+j, nlenmax );
+ scoreav += score;
+ scoreav2 += score * score;
+ printf( "score = %d\n", (int)score );
+ }
+ }
+
+ scoreav /= (double)npair;
+ scoreav2 /= (double)npair;
+ scorevar = ( scoreav2 - scoreav * scoreav )*npair/(npair-1);
+ scorestd = sqrt( scorevar );
+ printf( "av = %f\n", scoreav );
+ printf( "stddev = %f\n", scorestd );
+ printf( "cv = %f\n", scorestd/scoreav );
+
+ FreeCharMtx( pickseq );
+
+ if( scorestd/scoreav < 0.2 ) return( 's' );
+ else return( 't' );
+}
+
+static void pickup( int n, int *seqlen, int ***topol, char **name, char **seq ) // memsave ni mitaiou
+{
+ int i, j, k, m;
+ int **longestseq;
+ int **longestlen;
+ int *select;
+ char **nameout, **seqout;
+ int *nlenout;
+ char **namenotused, **seqnotused;
+ int *nlennotused;
+ FILE *notusedfp;
+
+ longestseq = AllocateIntMtx( n-1, 2 );
+ longestlen = AllocateIntMtx( n-1, 2 );
+ select = AllocateIntVec( n );
+ for( i=0; i<n; i++ ) select[i] = 0;
+ nameout = AllocateCharMtx( n, 0 );
+ seqout = AllocateCharMtx( n, 0 );
+ nlenout = AllocateIntVec( n );
+ namenotused = AllocateCharMtx( n, 0 );
+ seqnotused = AllocateCharMtx( n, 0 );
+ nlennotused = AllocateIntVec( n );
+
+ for( i=0; i<n-1; i++ )
+ {
+// reporterr( "STEP %d\n", i );
+ longestlen[i][0] = -1;
+ longestseq[i][0] = -1;
+ for( j=0; (m=topol[i][0][j])!=-1; j++ ) // sukoshi muda
+ {
+ if( seqlen[m] > longestlen[i][0] )
+ {
+ longestlen[i][0] = seqlen[m];
+ longestseq[i][0] = m;
+ }
+// reporterr( "%d ", topol[i][0][j] );
+ }
+// reporterr( "longest = %d (%d)\n", longestlen[i][0], longestseq[i][0] );
+
+
+ longestlen[i][1] = -1;
+ longestseq[i][1] = -1;
+ for( j=0; (m=topol[i][1][j])!=-1; j++ ) // sukoshi muda
+ {
+ if( seqlen[m] > longestlen[i][1] )
+ {
+ longestlen[i][1] = seqlen[m];
+ longestseq[i][1] = m;
+ }
+// reporterr( "%d ", topol[i][1][j] );
+ }
+// reporterr( "longest = %d (%d)\n", longestlen[i][1], longestseq[i][1] );
+ }
+ m = 1;
+ for( i=n-2; i>-1; i-- )
+ {
+// reporterr( "longest[%d][0] = %d (%d)\n", i, longestlen[i][0], longestseq[i][0] );
+// reporterr( "longest[%d][1] = %d (%d)\n", i, longestlen[i][1], longestseq[i][1] );
+ select[longestseq[i][0]] = 1;
+ select[longestseq[i][1]] = 1;
+ m += 1;
+ if( m >= sparsepickup ) break;
+ }
+ for( i=0, k=0, j=0; i<n; i++ )
+ {
+ if( select[i] )
+ {
+ nameout[k] = name[i];
+ seqout[k] = seq[i];
+ nlenout[k] = strlen( seqout[k] );
+ k++;
+ }
+ else
+ {
+ namenotused[j] = name[i];
+ seqnotused[j] = seq[i];
+ nlennotused[j] = strlen( seqnotused[j] );
+ j++;
+ }
+ }
+ writeData_pointer( stdout, m, nameout, nlenout, seqout );
+
+ notusedfp = fopen( "notused", "w" );
+ writeData_pointer( notusedfp, n-m, namenotused, nlennotused, seqnotused );
+ fclose( notusedfp );
+
+
+ free( nameout );
+ free( nlenout );
+ free( seqout );
+ free( namenotused );
+ free( nlennotused );
+ free( seqnotused );
+ FreeIntMtx( longestseq );
+ FreeIntMtx( longestlen );
+ free( select );
+}
-static int maxl;
-static int tsize;
+static int nunknown = 0;
void seq_grp_nuc( int *grp, char *seq )
{
if( tmp < 4 )
*grp++ = tmp;
else
- fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
+ nunknown++;
}
*grp = END_OF_VEC;
- if( grp - grpbk < 6 )
+ if( grp - grpbk < tuplesize )
{
-// fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
+// reporterr( "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
// exit( 1 );
*grpbk = -1;
}
int *grpbk = grp;
while( *seq )
{
- tmp = amino_grp[(int)*seq++];
+ tmp = amino_grp[(unsigned char)*seq++];
if( tmp < 6 )
*grp++ = tmp;
else
- fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
+ nunknown++;
}
*grp = END_OF_VEC;
if( grp - grpbk < 6 )
{
-// fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
+// reporterr( "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
// exit( 1 );
*grpbk = -1;
}
}
-void makecompositiontable_p( short *table, int *pointt )
+void makecompositiontable_p( int *table, int *pointt )
{
int point;
table[point]++;
}
-int commonsextet_p( short *table, int *pointt )
+
+void makepointtable_nuc_dectet( int *pointt, int *n )
{
- int value = 0;
- short tmp;
int point;
- static TLS short *memo = NULL;
- static TLS int *ct = NULL;
- static TLS int *cp;
+ register int *p;
- if( table == NULL )
+ if( *n == -1 )
{
- if( memo ) free( memo );
- if( ct ) free( ct );
- return( 0 );
+ *pointt = -1;
+ return;
}
- if( *pointt == -1 )
- return( 0 );
+ p = n;
+ point = *n++ *262144;
+ point += *n++ * 65536;
+ point += *n++ * 16384;
+ point += *n++ * 4096;
+ point += *n++ * 1024;
+ point += *n++ * 256;
+ point += *n++ * 64;
+ point += *n++ * 16;
+ point += *n++ * 4;
+ point += *n++;
+ *pointt++ = point;
- if( !memo )
+ while( *n != END_OF_VEC )
{
- memo = (short *)calloc( tsize, sizeof( short ) );
- if( !memo ) ErrorExit( "Cannot allocate memo\n" );
- ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) );
- if( !ct ) ErrorExit( "Cannot allocate memo\n" );
+ point -= *p++ *262144;
+ point *= 4;
+ point += *n++;
+ *pointt++ = point;
+
}
+ *pointt = END_OF_VEC;
+}
- cp = ct;
- while( ( point = *pointt++ ) != END_OF_VEC )
+void makepointtable_nuc_octet( int *pointt, int *n )
+{
+ int point;
+ register int *p;
+
+ if( *n == -1 )
{
- tmp = memo[point]++;
- if( tmp < table[point] )
- value++;
- if( tmp == 0 ) *cp++ = point;
+ *pointt = -1;
+ return;
}
- *cp = END_OF_VEC;
-
- cp = ct;
- while( *cp != END_OF_VEC )
- memo[*cp++] = 0;
- return( value );
+ p = n;
+ point = *n++ * 16384;
+ point += *n++ * 4096;
+ point += *n++ * 1024;
+ point += *n++ * 256;
+ point += *n++ * 64;
+ point += *n++ * 16;
+ point += *n++ * 4;
+ point += *n++;
+ *pointt++ = point;
+
+ while( *n != END_OF_VEC )
+ {
+ point -= *p++ * 16384;
+ point *= 4;
+ point += *n++;
+ *pointt++ = point;
+ }
+ *pointt = END_OF_VEC;
}
void makepointtable_nuc( int *pointt, int *n )
*pointt = END_OF_VEC;
}
-#ifdef enablemultithread
-static void *distancematrixthread( void *arg )
+static double preferenceval( int ori, int pos, int max ) // for debug
{
- distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
- int thread_no = targ->thread_no;
+ pos -= ori;
+ if( pos < 0 ) pos += max;
+ return( 0.00000000000001 * pos );
+}
+
+static void *compactdisthalfmtxthread( void *arg ) // enablemultithread == 0 demo tsukau
+{
+ compactdistmtxthread_arg_t *targ = (compactdistmtxthread_arg_t *)arg;
int njob = targ->njob;
- int *jobpospt = targ->jobpospt;
+ int thread_no = targ->thread_no;
+ int *selfscore = targ->selfscore;
+ double **partmtx = targ->partmtx;
+ int *nogaplen = targ->nogaplen;
int **pointt = targ->pointt;
- float **mtx = targ->mtx;
-
- short *table1;
+ int *jobpospt = targ->jobpospt;
+ double *mindist = targ->mindist;
+ int *mindistfrom = targ->mindistfrom;
int i, j;
+ double tmpdist, preference, tmpdistx, tmpdisty;
+ int *table1;
while( 1 )
{
- pthread_mutex_lock( targ->mutex );
- i = *jobpospt;
- if( i == njob )
+#ifdef enablemultithread
+ if( nthread )
{
+ pthread_mutex_lock( targ->mutex );
+ i = *jobpospt;
+ if( i == njob-1 )
+ {
+ pthread_mutex_unlock( targ->mutex );
+ commonsextet_p( NULL, NULL );
+ return( NULL );
+ }
+ *jobpospt = i+1;
pthread_mutex_unlock( targ->mutex );
- commonsextet_p( NULL, NULL );
- return( NULL );
}
- *jobpospt = i+1;
- pthread_mutex_unlock( targ->mutex );
+ else
+#endif
+ {
+ i = *jobpospt;
+ if( i == njob-1 )
+ {
+ commonsextet_p( NULL, NULL );
+ return( NULL );
+ }
+ *jobpospt = i+1;
+ }
- table1 = (short *)calloc( tsize, sizeof( short ) );
+ table1 = (int *)calloc( tsize, sizeof( int ) );
if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
- if( i % 10 == 0 )
+ if( i % 100 == 0 )
{
- fprintf( stderr, "\r% 5d / %d (thread %4d)", i+1, njob, thread_no );
+ if( nthread )
+ reporterr( "\r% 5d / %d (thread %4d)", i+1, njob, thread_no );
+ else
+ reporterr( "\r% 5d / %d", i+1, njob );
}
makecompositiontable_p( table1, pointt[i] );
- for( j=i; j<njob; j++ )
+ for( j=i+1; j<njob; j++ )
{
- mtx[i][j-i] = (float)commonsextet_p( table1, pointt[j] );
+
+ tmpdist = distcompact( nogaplen[i], nogaplen[j], table1, pointt[j], selfscore[i], selfscore[j] );
+ preference = preferenceval( i, j, njob );
+ tmpdistx = tmpdist + preference;
+ if( tmpdistx < mindist[i] )
+ {
+ mindist[i] = tmpdistx;
+ mindistfrom[i] = j;
+ }
+
+ preference = preferenceval( j, i, njob );
+ tmpdisty = tmpdist + preference;
+ if( tmpdisty < mindist[j] )
+ {
+ mindist[j] = tmpdisty;
+ mindistfrom[j] = i;
+ }
+
+ if( partmtx[i] ) partmtx[i][j] = tmpdist;
+ if( partmtx[j] ) partmtx[j][i] = tmpdist;
}
free( table1 );
}
}
-static void *treebasethread( void *arg )
+static void *msacompactdisthalfmtxthread( void *arg ) // enablemultithread == 0 demo tsukau
{
- treebasethread_arg_t *targ = (treebasethread_arg_t *)arg;
- int thread_no = targ->thread_no;
- int *nrunpt = targ->nrunpt;
+ msacompactdistmtxthread_arg_t *targ = (msacompactdistmtxthread_arg_t *)arg;
int njob = targ->njob;
- int *nlen = targ->nlen;
- int *jobpospt = targ->jobpospt;
- int ***topol = targ->topol;
- Treedep *dep = targ->dep;
- char **aseq = targ->aseq;
- double *effarr = targ->effarr;
- int *alloclen = targ->alloclenpt;
- int *fftlog = targ->fftlog;
-
- char **mseq1, **mseq2;
- char **localcopy;
- int i, j, l;
- int len1, len2;
- int clus1, clus2;
- float pscore, tscore;
- char *indication1, *indication2;
- double *effarr1 = NULL;
- double *effarr2 = NULL;
- float dumfl = 0.0;
- int ffttry;
- int m1, m2;
-#if 0
+ int thread_no = targ->thread_no;
+ int *selfscore = targ->selfscore;
+ double **partmtx = targ->partmtx;
+ char **seq = targ->seq;
+ int **skiptable = targ->skiptable;
+ double *mindist = targ->mindist;
+ int *mindistfrom = targ->mindistfrom;
+ int *jobpospt = targ->jobpospt;
+ double tmpdist, preference, tmpdistx, tmpdisty;
int i, j;
-#endif
-
- mseq1 = AllocateCharMtx( njob, 0 );
- mseq2 = AllocateCharMtx( njob, 0 );
- localcopy = calloc( njob, sizeof( char * ) );
- effarr1 = AllocateDoubleVec( njob );
- effarr2 = AllocateDoubleVec( njob );
- indication1 = AllocateCharVec( 150 );
- indication2 = AllocateCharVec( 150 );
-
-
-#if 0
- fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
-#endif
-
-#if 0
- for( i=0; i<njob; i++ )
- fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
-#endif
-
-// for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
-
-
-// writePre( njob, name, nlen, aseq, 0 );
while( 1 )
{
- pthread_mutex_lock( targ->mutex );
- l = *jobpospt;
- if( l == njob-1 )
+#ifdef enablemultithread
+ if( nthread )
{
- pthread_mutex_unlock( targ->mutex );
+ pthread_mutex_lock( targ->mutex );
+ i = *jobpospt;
+ if( i == njob-1 )
+ {
+ pthread_mutex_unlock( targ->mutex );
+ return( NULL );
+ }
+ *jobpospt = i+1;
+ pthread_mutex_unlock( targ->mutex );
+ }
+ else
+#endif
+ {
+ i = *jobpospt;
+ if( i == njob-1 )
+ {
+ return( NULL );
+ }
+ *jobpospt = i+1;
+ }
+
+ if( i % 100 == 0 )
+ {
+ if( nthread )
+ fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no );
+ else
+ fprintf( stderr, "\r% 5d / %d", i, njob );
+ }
+
+ for( j=i+1; j<njob; j++ )
+ {
+ tmpdist = distcompact_msa( seq[i], seq[j], skiptable[i], skiptable[j], selfscore[i], selfscore[j] ); // osoikedo,
+
+ preference = preferenceval( i, j, njob );
+ tmpdistx = tmpdist + preference;
+ if( tmpdistx < mindist[i] )
+ {
+ mindist[i] = tmpdistx;
+ mindistfrom[i] = j;
+ }
+
+ preference = preferenceval( j, i, njob );
+ tmpdisty = tmpdist + preference;
+ if( tmpdisty < mindist[j] )
+ {
+ mindist[j] = tmpdisty;
+ mindistfrom[j] = i;
+ }
+ if( partmtx[i] ) partmtx[i][j] = tmpdist;
+ if( partmtx[j] ) partmtx[j][i] = tmpdist;
+ }
+ }
+}
+
+#if 1
+static void *msadistmtxthread( void *arg ) // enablemultithread == 0 demo tsukau
+{
+ msadistmtxthread_arg_t *targ = (msadistmtxthread_arg_t *)arg;
+ int njob = targ->njob;
+ int thread_no = targ->thread_no;
+ int *selfscore = targ->selfscore;
+ double **iscore = targ->iscore;
+ char **seq = targ->seq;
+ int **skiptable = targ->skiptable;
+ Jobtable *jobpospt = targ->jobpospt;
+
+
+ double ssi, ssj, bunbo, iscoretmp;
+ int i, j;
+ int nlim = njob-1;
+
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( nthread )
+ {
+ pthread_mutex_lock( targ->mutex );
+ i = jobpospt->i; // (jobpospt-i)++ dato, shuuryou hantei no mae ni ++ surunode, tomaranakunaru.
+
+ if( i == nlim )
+ {
+ pthread_mutex_unlock( targ->mutex );
+ return( NULL );
+ }
+ jobpospt->i += 1;
+ pthread_mutex_unlock( targ->mutex );
+ if( i % 100 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no );
+ }
+ else
+#endif
+ {
+ i = (jobpospt->i)++;
+ if( i == nlim ) return( NULL );
+ if( i % 100 == 0 ) fprintf( stderr, "\r% 5d / %d", i, njob );
+ }
+
+ ssi = selfscore[i];
+ for( j=i+1; j<njob; j++ )
+ {
+ ssj = selfscore[j];
+ bunbo = MIN( ssi, ssj );
+//fprintf( stderr, "bunbo = %f\n", bunbo );
+//fprintf( stderr, "naivepairscorefast() = %f\n", naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) );
+ if( bunbo == 0.0 )
+ iscoretmp = 2.0; // 2013/Oct/17
+ else
+ {
+ iscoretmp = ( 1.0 - naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast
+ if( iscoretmp > 10 ) iscoretmp = 10.0; // 2015/Mar/17
+
+ }
+ if( iscoretmp < 0.0 )
+ {
+ reporterr( "WARNING: negative distance, iscoretmp = %f\n", iscoretmp );
+ iscoretmp = 0.0;
+ }
+ iscore[i][j-i] = iscoretmp;
+// printf( "i,j=%d,%d, iscoretmp=%f\n", i, j, iscoretmp );
+
+ }
+ }
+}
+#else
+static void *msadistmtxthread( void *arg ) // enablemultithread == 0 demo tsukau
+{
+ msadistmtxthread_arg_t *targ = (msadistmtxthread_arg_t *)arg;
+ int njob = targ->njob;
+ int thread_no = targ->thread_no;
+ int *selfscore = targ->selfscore;
+ double **iscore = targ->iscore;
+ char **seq = targ->seq;
+ int **skiptable = targ->skiptable;
+ Jobtable *jobpospt = targ->jobpospt;
+
+
+ double ssi, ssj, bunbo, iscoretmp;
+ int i, j;
+
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( nthread ) pthread_mutex_lock( targ->mutex );
+#endif
+ j = jobpospt->j;
+ i = jobpospt->i;
+ j++;
+ if( j == njob )
+ {
+ i++;
+ j = i + 1;
+ if( i == njob-1 )
+ {
+#ifdef enablemultithread
+ if( nthread ) pthread_mutex_unlock( targ->mutex );
+#endif
+ return( NULL );
+ }
+ }
+ jobpospt->j = j;
+ jobpospt->i = i;
+#ifdef enablemultithread
+ if( nthread ) pthread_mutex_unlock( targ->mutex );
+#endif
+
+
+ if( nthread )
+ {
+ if( j==i+1 && i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no );
+ }
+ else
+ {
+ if( j==i+1 && i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", i, njob );
+ }
+ ssi = selfscore[i];
+ ssj = selfscore[j];
+ bunbo = MIN( ssi, ssj );
+//fprintf( stderr, "bunbo = %f\n", bunbo );
+//fprintf( stderr, "naivepairscorefast() = %f\n", naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) );
+ if( bunbo == 0.0 )
+ iscoretmp = 2.0; // 2013/Oct/17
+ else
+ {
+ iscoretmp = ( 1.0 - naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast
+ if( iscoretmp > 10 ) iscoretmp = 10.0; // 2015/Mar/17
+
+ }
+ iscore[i][j-i] = iscoretmp;
+
+
+ }
+}
+#endif
+
+#ifdef enablemultithread
+static void *distancematrixthread( void *arg )
+{
+ distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
+ int thread_no = targ->thread_no;
+ int njob = targ->njob;
+ int *jobpospt = targ->jobpospt;
+ int **pointt = targ->pointt;
+ double **mtx = targ->mtx;
+
+ int *table1;
+ int i, j;
+
+ while( 1 )
+ {
+ pthread_mutex_lock( targ->mutex );
+ i = *jobpospt;
+ if( i == njob )
+ {
+ pthread_mutex_unlock( targ->mutex );
+ commonsextet_p( NULL, NULL );
+ return( NULL );
+ }
+ *jobpospt = i+1;
+ pthread_mutex_unlock( targ->mutex );
+
+ table1 = (int *)calloc( tsize, sizeof( int ) );
+ if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+ if( i % 100 == 0 )
+ {
+ reporterr( "\r% 5d / %d (thread %4d)", i+1, njob, thread_no );
+ }
+ makecompositiontable_p( table1, pointt[i] );
+
+ for( j=i; j<njob; j++ )
+ {
+ mtx[i][j-i] = (double)commonsextet_p( table1, pointt[j] );
+ }
+ free( table1 );
+ }
+}
+
+
+static void *treebasethread( void *arg )
+{
+ treebasethread_arg_t *targ = (treebasethread_arg_t *)arg;
+ int thread_no = targ->thread_no;
+ int *nrunpt = targ->nrunpt;
+ int njob = targ->njob;
+ int *nlen = targ->nlen;
+ int *jobpospt = targ->jobpospt;
+ int ***topol = targ->topol;
+ Treedep *dep = targ->dep;
+ char **aseq = targ->aseq;
+ double *effarr = targ->effarr;
+ int *alloclen = targ->alloclenpt;
+ int *fftlog = targ->fftlog;
+ char *mergeoralign = targ->mergeoralign;
+ double **newdistmtx = targ->newdistmtx;
+ int *selfscore = targ->selfscore;
+
+ char **mseq1, **mseq2;
+ char **localcopy;
+ int i, m, j, l;
+ int immin, immax;
+ int len1, len2;
+ int clus1, clus2;
+ double pscore, tscore;
+ char *indication1, *indication2;
+ double *effarr1 = NULL;
+ double *effarr2 = NULL;
+// double dumfl = 0.0;
+ double dumdb = 0.0;
+ int ffttry;
+ int m1, m2;
+ double **dynamicmtx;
+ int ssi, ssm, bunbo;
+ int tm, ti;
+ int **localmem = NULL;
+ int posinmem;
+#if SKIP
+ int **skiptable1 = NULL, **skiptable2 = NULL;
+#endif
+#if 0
+ int i, j;
+#endif
+
+ mseq1 = AllocateCharMtx( njob, 0 );
+ mseq2 = AllocateCharMtx( njob, 0 );
+ localcopy = calloc( njob, sizeof( char * ) );
+ for( i=0; i<njob; i++ ) localcopy[i] = NULL;
+ effarr1 = AllocateDoubleVec( njob );
+ effarr2 = AllocateDoubleVec( njob );
+ indication1 = AllocateCharVec( 150 );
+ indication2 = AllocateCharVec( 150 );
+ dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets );
+ localmem = AllocateIntMtx( 2, njob+1 );
+
+
+#if 0
+ reporterr( "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
+#endif
+
+#if 0
+ for( i=0; i<njob; i++ )
+ reporterr( "TBFAST effarr[%d] = %f\n", i, effarr[i] );
+#endif
+
+// for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
+
+
+// writePre( njob, name, nlen, aseq, 0 );
+
+ while( 1 )
+ {
+ pthread_mutex_lock( targ->mutex );
+ l = *jobpospt;
+ if( l == njob-1 )
+ {
+ pthread_mutex_unlock( targ->mutex );
if( commonIP ) FreeIntMtx( commonIP );
commonIP = NULL;
- Falign( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+ Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+ Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL );
+ A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 );
+ D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+ G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru?
free( mseq1 );
free( mseq2 );
free( localcopy );
free( effarr2 );
free( indication1 );
free( indication2 );
+ FreeDoubleMtx( dynamicmtx );
+ FreeIntMtx( localmem );
return( NULL );
}
*jobpospt = l+1;
pthread_cond_wait( targ->treecond, targ->mutex );
(*nrunpt)++;
+
+ if( mergeoralign[l] == 'n' )
+ {
+// reporterr( "SKIP!\n" );
+ dep[l].done = 1;
+ (*nrunpt)--;
+ pthread_cond_broadcast( targ->treecond );
+// free( topol[l][0] ); topol[l][0] = NULL;
+// free( topol[l][1] ); topol[l][1] = NULL;
+// free( topol[l] ); topol[l] = NULL;
+ pthread_mutex_unlock( targ->mutex );
+ continue;
+ }
+
+
+
m1 = topol[l][0][0];
m2 = topol[l][1][0];
+// reporterr( "\ndistfromtip = %f\n", dep[l].distfromtip );
+// makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip - 0.5 );
+ makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip );
+
len1 = strlen( aseq[m1] );
len2 = strlen( aseq[m2] );
if( *alloclen <= len1 + len2 )
{
- fprintf( stderr, "\nReallocating.." );
+ reporterr( "\nReallocating.." );
*alloclen = ( len1 + len2 ) + 1000;
ReallocateCharMtx( aseq, njob, *alloclen + 10 );
- fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
+ reporterr( "done. *alloclen = %d\n", *alloclen );
}
- for( i=0; (j=topol[l][0][i])!=-1; i++ )
+ localmem[0][0] = -1;
+ posinmem=0;
+ topolorder( njob, localmem[0], &posinmem, topol, dep, l, 0 );
+ localmem[1][0] = -1;
+ posinmem=0;
+ topolorder( njob, localmem[1], &posinmem, topol, dep, l, 1 );
+ for( i=0; (j=localmem[0][i])!=-1; i++ )
{
localcopy[j] = calloc( *alloclen, sizeof( char ) );
strcpy( localcopy[j], aseq[j] );
}
- for( i=0; (j=topol[l][1][i])!=-1; i++ )
+ for( i=0; (j=localmem[1][i])!=-1; i++ )
{
localcopy[j] = calloc( *alloclen, sizeof( char ) );
strcpy( localcopy[j], aseq[j] );
}
+
+ if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) )
+ {
+ reporterr( "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
+ alg = 'M';
+ }
+
+ if( alg == 'M' ) // hoka no thread ga M ni shitakamo shirenainode
+ {
+// reporterr( "Freeing commonIP (thread %d)\n", thread_no );
+ if( commonIP ) FreeIntMtx( commonIP );
+ commonIP = NULL;
+ commonAlloc1 = 0;
+ commonAlloc2 = 0;
+ }
+
pthread_mutex_unlock( targ->mutex );
#if 1 // CHUUI@@@@
- clus1 = fastconjuction_noname( topol[l][0], localcopy, mseq1, effarr1, effarr, indication1 );
- clus2 = fastconjuction_noname( topol[l][1], localcopy, mseq2, effarr2, effarr, indication2 );
+ clus1 = fastconjuction_noname( localmem[0], localcopy, mseq1, effarr1, effarr, indication1, 0.0 );
+ clus2 = fastconjuction_noname( localmem[1], localcopy, mseq2, effarr2, effarr, indication2, 0.0 );
#else
clus1 = fastconjuction_noweight( topol[l][0], localcopy, mseq1, effarr1, indication1 );
clus2 = fastconjuction_noweight( topol[l][1], localcopy, mseq2, effarr2, indication2 );
{
if( strlen( mseq1[i] ) != len1 )
{
- fprintf( stderr, "i = %d / %d\n", i, clus1 );
- fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" );
+ reporterr( "i = %d / %d\n", i, clus1 );
+ reporterr( "hairetsu ga kowareta (in treebase, after conjuction) !\n" );
exit( 1 );
}
}
{
if( strlen( mseq2[j] ) != len2 )
{
- fprintf( stderr, "j = %d / %d\n", j, clus2 );
- fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" );
+ reporterr( "j = %d / %d\n", j, clus2 );
+ reporterr( "hairetsu ga kowareta (in treebase, after conjuction) !\n" );
exit( 1 );
}
}
{
if( strlen( mseq1[i] ) != len1 )
{
- fprintf( stderr, "i = %d / %d\n", i, clus1 );
- fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" );
+ reporterr( "i = %d / %d\n", i, clus1 );
+ reporterr( "hairetsu ga kowareta (in treebase, after free topol) !\n" );
exit( 1 );
}
}
{
if( strlen( mseq2[j] ) != len2 )
{
- fprintf( stderr, "j = %d / %d\n", j, clus2 );
- fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" );
+ reporterr( "j = %d / %d\n", j, clus2 );
+ reporterr( "hairetsu ga kowareta (in treebase, after free topol) !\n" );
exit( 1 );
}
}
// fprintf( trap_g, "group1 = %s\n", indication1 );
// fprintf( trap_g, "group2 = %s\n", indication2 );
-// fprintf( stderr, "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
- fprintf( stderr, "\rSTEP % 5d / %d (thread %4d)", l+1, njob-1, thread_no );
+// reporterr( "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
+ reporterr( "\rSTEP % 5d / %d (thread %4d)", l+1, njob-1, thread_no );
#if 0
- fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
- fprintf( stderr, "group1 = %.66s", indication1 );
- if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
- fprintf( stderr, "\n" );
- fprintf( stderr, "group2 = %.66s", indication2 );
- if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
- fprintf( stderr, "\n" );
+ reporterr( "STEP %d /%d\n", l+1, njob-1 );
+ reporterr( "group1 = %.66s", indication1 );
+ if( strlen( indication1 ) > 66 ) reporterr( "..." );
+ reporterr( "\n" );
+ reporterr( "group2 = %.66s", indication2 );
+ if( strlen( indication2 ) > 66 ) reporterr( "..." );
+ reporterr( "\n" );
#endif
/*
- fprintf( stderr, "before align all\n" );
+ reporterr( "before align all\n" );
display( aseq, njob );
- fprintf( stderr, "\n" );
- fprintf( stderr, "before align 1 %s \n", indication1 );
+ reporterr( "\n" );
+ reporterr( "before align 1 %s \n", indication1 );
display( mseq1, clus1 );
- fprintf( stderr, "\n" );
- fprintf( stderr, "before align 2 %s \n", indication2 );
+ reporterr( "\n" );
+ reporterr( "before align 2 %s \n", indication2 );
display( mseq2, clus2 );
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
*/
- if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) )
- {
- fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
- alg = 'M';
- if( commonIP ) FreeIntMtx( commonIP );
- commonAlloc1 = 0;
- commonAlloc2 = 0;
- }
// if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 );
if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000);
else ffttry = 0;
// ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708
-// fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
+// reporterr( "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 );
if( force_fft || ( use_fft && ffttry ) )
{
- fprintf( stderr, "f" );
+ reporterr( "f" );
if( alg == 'M' )
{
- fprintf( stderr, "m" );
- pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
+ reporterr( "m" );
+ pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 );
}
else
{
- pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
+ pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
}
}
else
{
- fprintf( stderr, "d" );
+ reporterr( "d" );
fftlog[m1] = 0;
switch( alg )
{
pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
break;
case( 'M' ):
- fprintf( stderr, "m" );
-// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+ reporterr( "m" );
+// reporterr( "%d-%d", clus1, clus2 );
+ pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
break;
- case( 'Q' ):
- if( clus1 == 1 && clus2 == 1 && 0 )
+ case( 'd' ):
+ if( 1 && clus1 == 1 && clus2 == 1 )
{
-// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
+// reporterr( "%d-%d", clus1, clus2 );
+ pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap );
}
else
{
- pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
+// reporterr( "%d-%d", clus1, clus2 );
+ pscore = D__align_ls( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
}
break;
- case( 'R' ):
- pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
- break;
- case( 'H' ):
- pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
- break;
case( 'A' ):
if( clus1 == 1 && clus2 == 1 )
{
-// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
+// reporterr( "%d-%d", clus1, clus2 );
+ pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap );
}
else
{
-// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+// reporterr( "%d-%d", clus1, clus2 );
+ pscore = A__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1 );
}
break;
default:
}
}
#if SCOREOUT
- fprintf( stderr, "score = %10.2f\n", pscore );
+ reporterr( "score = %10.2f\n", pscore );
#endif
tscore += pscore;
nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
-
if( disp ) display( localcopy, njob );
+ if( newdistmtx ) // tsukawanai
+ {
+#if 0
+ reporterr( "group1 = " );
+ for( i=0; i<clus1; i++ ) reporterr( "%d ", topol[l][0][i] );
+ reporterr( "\n" );
+ reporterr( "group2 = " );
+ for( m=0; m<clus2; m++ ) reporterr( "%d ", topol[l][1][m] );
+ reporterr( "\n" );
+#endif
+#if SKIP
+ skiptable1 = AllocateIntMtx( clus1, 0 );
+ skiptable2 = AllocateIntMtx( clus2, 0 );
+ makeskiptable( clus1, skiptable1, mseq1 ); // allocate suru.
+ makeskiptable( clus2, skiptable2, mseq2 ); // allocate suru.
+#endif
+ for( i=0; i<clus1; i++ )
+ {
+ ti = localmem[0][i];
+ ssi = selfscore[localmem[0][i]];
+ for( m=0; m<clus2; m++ )
+ {
+ ssm = selfscore[localmem[1][m]];
+ tm = localmem[1][m];
+ if( ti<tm )
+ {
+ immin = ti;
+ immax = tm;
+ }
+ else
+ {
+ immin = tm;
+ immax = ti;
+ }
+ bunbo = MIN( ssi, ssm );
+ if( bunbo == 0 )
+ newdistmtx[immin][immax-immin] = 2.0; // 2013/Oct/17
+ else
+#if SKIP
+ newdistmtx[immin][immax-immin] = ( 1.0 - naivepairscorefast( mseq1[i], mseq2[m], skiptable1[i], skiptable2[m], penalty_dist ) / bunbo ) * 2.0;
+#else
+ newdistmtx[immin][immax-immin] = ( 1.0 - naivepairscore11( mseq1[i], mseq2[m], penalty_dist ) / bunbo ) * 2.0;
+#endif
+ }
+ }
+#if SKIP
+ FreeIntMtx( skiptable1 ); skiptable1 = NULL;
+ FreeIntMtx( skiptable2 ); skiptable2 = NULL;
+#endif
+ }
+
+
+
+
+
+
pthread_mutex_lock( targ->mutex );
dep[l].done = 1;
(*nrunpt)--;
pthread_cond_broadcast( targ->treecond );
- for( i=0; (j=topol[l][0][i])!=-1; i++ )
+ for( i=0; (j=localmem[0][i])!=-1; i++ )
strcpy( aseq[j], localcopy[j] );
- for( i=0; (j=topol[l][1][i])!=-1; i++ )
+ for( i=0; (j=localmem[1][i])!=-1; i++ )
strcpy( aseq[j], localcopy[j] );
- pthread_mutex_unlock( targ->mutex );
- for( i=0; (j=topol[l][0][i])!=-1; i++ )
- free( localcopy[j] );
- for( i=0; (j=topol[l][1][i])!=-1; i++ )
- free( localcopy[j] );
- free( topol[l][0] );
- free( topol[l][1] );
- free( topol[l] );
+// reporterr( "at step %d\n", l );
+// use_getrusage();
+ pthread_mutex_unlock( targ->mutex );
-// fprintf( stderr, "\n" );
- }
-#if SCOREOUT
- fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
-#endif
-}
-#endif
-static void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen )
-{
- int l, len1, len2, i, m;
- int len1nocommongap, len2nocommongap;
+
+ for( i=0; (j=localmem[0][i])!=-1; i++ )
+ {
+ if(localcopy[j] ) free( localcopy[j] );
+ localcopy[j] = NULL;
+ }
+ for( i=0; (j=localmem[1][i])!=-1; i++ )
+ {
+ if( localcopy[j] ) free( localcopy[j] );
+ localcopy[j] = NULL;
+ }
+
+
+// if( topol[l][0] ) free( topol[l][0] );
+// topol[l][0] = NULL;
+// if( topol[l][1] ) free( topol[l][1] );
+// topol[l][1] = NULL;
+// if( topol[l] ) free( topol[l] );
+// topol[l] = NULL;
+
+
+// reporterr( "\n" );
+ }
+#if SCOREOUT
+ reporterr( "totalscore = %10.2f\n\n", tscore );
+#endif
+}
+#endif
+
+static int treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, Treedep *dep, double *effarr, double **newdistmtx, int *selfscore, int *alloclen, int (*callback)(int, int, char*) )
+{
+ int l, len1, len2, i, m, immin, immax;
+ int len1nocommongap, len2nocommongap;
int clus1, clus2;
- float pscore, tscore;
- static char *indication1, *indication2;
- static double *effarr1 = NULL;
- static double *effarr2 = NULL;
- static int *fftlog; // fixed at 2006/07/26
- float dumfl = 0.0;
+ double pscore, tscore;
+ char *indication1 = NULL, *indication2 = NULL;
+ double *effarr1 = NULL;
+ double *effarr2 = NULL;
+ int *fftlog = NULL; // fixed at 2006/07/26
+// double dumfl = 0.0;
+ double dumdb = 0.0;
int ffttry;
int m1, m2;
- static int *gaplen;
- static int *gapmap;
- static int *alreadyaligned;
+ int *gaplen = NULL;
+ int *gapmap = NULL;
+ int *alreadyaligned = NULL;
+ double **dynamicmtx = NULL;
+ double ssi, ssm, bunbo;
+ int tm, ti;
+ int gapmaplen;
+ int **localmem = NULL;
+ int posinmem;
+#if SKIP
+ int **skiptable1 = NULL, **skiptable2 = NULL;
+#endif
#if 0
int i, j;
#endif
+// reporterr( "treebase newdistmtx=%p\n", newdistmtx );
+
if( effarr1 == NULL )
{
effarr1 = AllocateDoubleVec( njob );
gaplen = AllocateIntVec( *alloclen+10 );
gapmap = AllocateIntVec( *alloclen+10 );
alreadyaligned = AllocateIntVec( njob );
+ dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets );
+ localmem = AllocateIntMtx( 2, njob+1 );
}
for( i=0; i<njob-nadd; i++ ) alreadyaligned[i] = 1;
for( i=njob-nadd; i<njob; i++ ) alreadyaligned[i] = 0;
+ if( callback && callback( 0, 50, "Progressive alignment" ) ) goto chudan_tbfast;
+
for( l=0; l<njob; l++ ) fftlog[l] = 1;
+ localmem[0][0] = -1;
+ localmem[1][0] = -1;
+ clus1 = 1;// chain ni hitsuyou
#if 0
- fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
+ reporterr( "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
#endif
#if 0
for( i=0; i<njob; i++ )
- fprintf( stderr, "TBFAST effarr[%d] = %f\n", i, effarr[i] );
+ reporterr( "TBFAST effarr[%d] = %f\n", i, effarr[i] );
#endif
// for( i=0; i<njob; i++ ) strcpy( aseq[i], seq[i] );
tscore = 0.0;
for( l=0; l<njob-1; l++ )
{
+// reporterr( " at the beginning of the loop, clus1,clus2=%d,%d\n", clus1, clus2 );
+
+ if( l > 0 && dep[l].child0 == l-1 && dep[l].child1 == -1 && dep[dep[l].child0].child1 == -1 )
+ {
+ localmem[0][clus1] = topol[l-1][1][0];
+ localmem[0][clus1+1] = -1;
+
+ localmem[1][0] = topol[l][1][0];
+ localmem[1][1] = -1;
+ }
+ else
+ {
+ localmem[0][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[0], &posinmem, topol, dep, l, 0 );
+ localmem[1][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[1], &posinmem, topol, dep, l, 1 );
+ }
+
if( mergeoralign[l] == 'n' )
{
-// fprintf( stderr, "SKIP!\n" );
- free( topol[l][0] );
- free( topol[l][1] );
- free( topol[l] );
+// reporterr( "SKIP!\n" );
+// free( topol[l][0] ); topol[l][0] = NULL;
+// free( topol[l][1] ); topol[l][1] = NULL;
+// free( topol[l] ); topol[l] = NULL;
continue;
}
+// reporterr( "\ndistfromtip = %f\n", dep[l].distfromtip );
+ makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip );
+// makedynamicmtx( dynamicmtx, n_dis_consweight_multi, ( dep[l].distfromtip - 0.2 ) * 3 );
+
+
m1 = topol[l][0][0];
m2 = topol[l][1][0];
len1 = strlen( aseq[m1] );
len2 = strlen( aseq[m2] );
if( *alloclen < len1 + len2 )
{
- fprintf( stderr, "\nReallocating.." );
+ reporterr( "\nReallocating.." );
*alloclen = ( len1 + len2 ) + 1000;
ReallocateCharMtx( aseq, njob, *alloclen + 10 );
gaplen = realloc( gaplen, ( *alloclen + 10 ) * sizeof( int ) );
if( gaplen == NULL )
{
- fprintf( stderr, "Cannot realloc gaplen\n" );
+ reporterr( "Cannot realloc gaplen\n" );
exit( 1 );
}
gapmap = realloc( gapmap, ( *alloclen + 10 ) * sizeof( int ) );
if( gapmap == NULL )
{
- fprintf( stderr, "Cannot realloc gapmap\n" );
+ reporterr( "Cannot realloc gapmap\n" );
exit( 1 );
}
- fprintf( stderr, "done. *alloclen = %d\n", *alloclen );
+ reporterr( "done. *alloclen = %d\n", *alloclen );
}
#if 1 // CHUUI@@@@
- clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1 );
- clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2 );
+ clus1 = fastconjuction_noname( localmem[0], aseq, mseq1, effarr1, effarr, indication1, 0.0 );
+ clus2 = fastconjuction_noname( localmem[1], aseq, mseq2, effarr2, effarr, indication2, 0.0 );
#else
- clus1 = fastconjuction_noweight( topol[l][0], aseq, mseq1, effarr1, indication1 );
- clus2 = fastconjuction_noweight( topol[l][1], aseq, mseq2, effarr2, indication2 );
+ clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1, 0.0 );
+ clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2, 0.0 );
+// clus1 = fastconjuction_noweight( topol[l][0], aseq, mseq1, effarr1, indication1 );
+// clus2 = fastconjuction_noweight( topol[l][1], aseq, mseq2, effarr2, indication2 );
#endif
+
+
+
+
+
+
+
+
+
+
+
if( mergeoralign[l] == '1' || mergeoralign[l] == '2' )
{
newgapstr = "=";
{
if( strlen( mseq1[i] ) != len1 )
{
- fprintf( stderr, "i = %d / %d\n", i, clus1 );
- fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" );
+ reporterr( "i = %d / %d\n", i, clus1 );
+ reporterr( "hairetsu ga kowareta (in treebase, after conjuction) !\n" );
exit( 1 );
}
}
{
if( strlen( mseq2[j] ) != len2 )
{
- fprintf( stderr, "j = %d / %d\n", j, clus2 );
- fprintf( stderr, "hairetsu ga kowareta (in treebase, after conjuction) !\n" );
+ reporterr( "j = %d / %d\n", j, clus2 );
+ reporterr( "hairetsu ga kowareta (in treebase, after conjuction) !\n" );
exit( 1 );
}
}
{
if( strlen( mseq1[i] ) != len1 )
{
- fprintf( stderr, "i = %d / %d\n", i, clus1 );
- fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" );
+ reporterr( "i = %d / %d\n", i, clus1 );
+ reporterr( "hairetsu ga kowareta (in treebase, after free topol) !\n" );
exit( 1 );
}
}
{
if( strlen( mseq2[j] ) != len2 )
{
- fprintf( stderr, "j = %d / %d\n", j, clus2 );
- fprintf( stderr, "hairetsu ga kowareta (in treebase, after free topol) !\n" );
+ reporterr( "j = %d / %d\n", j, clus2 );
+ reporterr( "hairetsu ga kowareta (in treebase, after free topol) !\n" );
exit( 1 );
}
}
// fprintf( trap_g, "group1 = %s\n", indication1 );
// fprintf( trap_g, "group2 = %s\n", indication2 );
-// fprintf( stderr, "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
- fprintf( stderr, "\rSTEP % 5d / %d ", l+1, njob-1 );
- fflush( stderr );
+// reporterr( "\rSTEP % 5d / %d %d-%d", l+1, njob-1, clus1, clus2 );
+ reporterr( "\rSTEP % 5d / %d ", l+1, njob-1 );
+ if( callback && callback( 0, 50+50*l/(njob-1), "Progressive alignment" ) ) goto chudan_tbfast;
#if 0
- fprintf( stderr, "STEP %d /%d\n", l+1, njob-1 );
- fprintf( stderr, "group1 = %.66s", indication1 );
- if( strlen( indication1 ) > 66 ) fprintf( stderr, "..." );
- fprintf( stderr, "\n" );
- fprintf( stderr, "group2 = %.66s", indication2 );
- if( strlen( indication2 ) > 66 ) fprintf( stderr, "..." );
- fprintf( stderr, "\n" );
+ reporterr( "STEP %d /%d\n", l+1, njob-1 );
+ reporterr( "group1 = %.66s", indication1 );
+ if( strlen( indication1 ) > 66 ) reporterr( "..." );
+ reporterr( "\n" );
+ reporterr( "group2 = %.66s", indication2 );
+ if( strlen( indication2 ) > 66 ) reporterr( "..." );
+ reporterr( "\n" );
#endif
/*
- fprintf( stderr, "before align all\n" );
+ reporterr( "before align all\n" );
display( aseq, njob );
- fprintf( stderr, "\n" );
- fprintf( stderr, "before align 1 %s \n", indication1 );
+ reporterr( "\n" );
+ reporterr( "before align 1 %s \n", indication1 );
display( mseq1, clus1 );
- fprintf( stderr, "\n" );
- fprintf( stderr, "before align 2 %s \n", indication2 );
+ reporterr( "\n" );
+ reporterr( "before align 2 %s \n", indication2 );
display( mseq2, clus2 );
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
*/
if( !nevermemsave && ( alg != 'M' && ( len1 > 30000 || len2 > 30000 ) ) )
{
- fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
+ reporterr( "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
alg = 'M';
if( commonIP ) FreeIntMtx( commonIP );
+ commonIP = NULL;
commonAlloc1 = 0;
commonAlloc2 = 0;
}
if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000);
else ffttry = 0;
// ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000); // v6.708
-// fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
+// reporterr( "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 );
if( force_fft || ( use_fft && ffttry ) )
{
- fprintf( stderr, "f" );
+ reporterr( "f" );
if( alg == 'M' )
{
- fprintf( stderr, "m" );
- pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
+ reporterr( "m" );
+ pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 );
}
else
{
- pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
+ pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
+// reporterr( "######### mseq1[0] = %s\n", mseq1[0] );
}
}
else
{
- fprintf( stderr, "d" );
+ reporterr( "d" );
fftlog[m1] = 0;
switch( alg )
{
pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen );
break;
case( 'M' ):
- fprintf( stderr, "m" );
-// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+ reporterr( "m" );
+// reporterr( "%d-%d", clus1, clus2 );
+ pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
break;
- case( 'Q' ):
- if( clus1 == 1 && clus2 == 1 && 0 )
+ case( 'd' ):
+ if( 1 && clus1 == 1 && clus2 == 1 )
{
-// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
+// reporterr( "%d-%d", clus1, clus2 );
+ pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap );
}
else
{
- pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
+// reporterr( "%d-%d", clus1, clus2 );
+ pscore = D__align_ls( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
}
break;
- case( 'R' ):
- pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
- break;
- case( 'H' ):
- pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
- break;
case( 'A' ):
if( clus1 == 1 && clus2 == 1 )
{
-// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
+// reporterr( "%d-%d", clus1, clus2 );
+ pscore = G__align11( dynamicmtx, mseq1, mseq2, *alloclen, outgap, outgap );
}
else
{
-// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+// reporterr( "\n\n %d - %d (%d-%d) : ", topol[l][0][0], topol[l][1][0], clus1, clus2 );
+ pscore = A__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, localmem[0][0], 1 );
}
break;
default:
}
}
#if SCOREOUT
- fprintf( stderr, "score = %10.2f\n", pscore );
+ reporterr( "score = %10.2f\n", pscore );
#endif
tscore += pscore;
nlen[m1] = 0.5 * ( nlen[m1] + nlen[m2] );
// writePre( njob, name, nlen, aseq, 0 );
if( disp ) display( aseq, njob );
-// fprintf( stderr, "\n" );
+// reporterr( "\n" );
if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara.
{
- adjustgapmap( strlen( mseq2[0] )-len2nocommongap+len2, gapmap, mseq2[0] );
- restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen );
- findnewgaps( clus2, mseq2, gaplen );
- insertnewgaps( njob, alreadyaligned, aseq, topol[l][1], topol[l][0], gaplen, gapmap, *alloclen, alg );
- for( i=0; i<njob; i++ ) eq2dash( aseq[i] );
- for( i=0; (m=topol[l][0][i])>-1; i++ ) alreadyaligned[m] = 1;
+ reporterr( "Check source!!!\n" );
+ exit( 1 );
}
if( mergeoralign[l] == '2' )
{
- adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] );
-// fprintf( stderr, ">STEP1 mseq1[0] = \n%s\n", mseq1[0] );
-// fprintf( stderr, ">STEP1 mseq2[0] = \n%s\n", mseq2[0] );
- restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen );
-// fprintf( stderr, "STEP2 mseq1[0] = %s\n", mseq1[0] );
-// fprintf( stderr, "STEP2 mseq2[0] = %s\n", mseq2[0] );
- findnewgaps( clus1, mseq1, gaplen );
- insertnewgaps( njob, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg );
-// fprintf( stderr, "STEP3 mseq1[0] = %s\n", mseq1[0] );
-// fprintf( stderr, "STEP3 mseq2[0] = %s\n", mseq2[0] );
+// if( localkeeplength ) ndeleted += deletenewinsertions( clus1, clus2, mseq1, mseq2, NULL );
+// for( i=0; i<clus1; i++ ) reporterr( ">STEP0 mseq1[%d] = \n%s\n", i, mseq1[i] );
+// for( i=0; i<clus2; i++ ) reporterr( ">STEP0 mseq2[%d] = \n%s\n", i, mseq2[i] );
+ gapmaplen = strlen( mseq1[0] )-len1nocommongap+len1;
+ adjustgapmap( gapmaplen, gapmap, mseq1[0] );
+#if 0
+ reporterr( "\n" );
+ for( i=0; i<clus1; i++ ) reporterr( ">STEP1 mseq1[%d] = \n%s\n", i, mseq1[i] );
+ for( i=0; i<clus2; i++ ) reporterr( ">STEP1 mseq2[%d] = \n%s\n", i, mseq2[i] );
+#endif
+// if( clus1 + clus2 < njob ) restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' );
+ if( smoothing )
+ {
+ restorecommongapssmoothly( njob, njob-(clus1+clus2), aseq, localmem[0], localmem[1], gapmap, *alloclen, '-' );
+ findnewgaps( clus1, 0, mseq1, gaplen );
+ insertnewgaps_bothorders( njob, alreadyaligned, aseq, localmem[0], localmem[1], gaplen, gapmap, gapmaplen, *alloclen, alg, '-' );
+ }
+ else
+ {
+ restorecommongaps( njob, njob-(clus1+clus2), aseq, localmem[0], localmem[1], gapmap, *alloclen, '-' );
+ findnewgaps( clus1, 0, mseq1, gaplen );
+ insertnewgaps( njob, alreadyaligned, aseq, localmem[0], localmem[1], gaplen, gapmap, *alloclen, alg, '-' );
+ }
+
+#if 0
+ reporterr( "\n" );
+ for( i=0; i<clus1; i++ ) reporterr( ">STEP3 mseq1[%d] = \n%s\n", i, mseq1[i] );
+ for( i=0; i<clus2; i++ ) reporterr( ">STEP3 mseq2[%d] = \n%s\n", i, mseq2[i] );
+#endif
+
+#if 0
for( i=0; i<njob; i++ ) eq2dash( aseq[i] );
- for( i=0; (m=topol[l][1][i])>-1; i++ ) alreadyaligned[m] = 1;
+ for( i=0; i<clus1; i++ )
+ {
+ reporterr( "mseq1[%d] bef change = %s\n", i, mseq1[i] );
+ eq2dash( mseq1[i] );
+ reporterr( "mseq1[%d] aft change = %s\n", i, mseq1[i] );
+ }
+ for( i=0; i<clus2; i++ )
+ {
+ reporterr( "mseq2[%d] bef change = %s\n", i, mseq2[i] );
+ eq2dash( mseq2[i] );
+ reporterr( "mseq2[%d] aft change = %s\n", i, mseq2[i] );
+ }
+ for( i=0; i<clus1; i++ ) eq2dash( mseq1[i] );
+ for( i=0; i<clus2; i++ ) eq2dash( mseq2[i] );
+#endif
+
+
+ eq2dashmatometehayaku( mseq1, clus1 );
+ eq2dashmatometehayaku( mseq2, clus2 );
+
+ for( i=0; (m=localmem[1][i])>-1; i++ ) alreadyaligned[m] = 1;
+ }
+
+ if( newdistmtx ) // tsukawanai
+ {
+#if 0
+ reporterr( "group1 = " );
+ for( i=0; i<clus1; i++ ) reporterr( "%d ", topol[l][0][i] );
+ reporterr( "\n" );
+ reporterr( "group2 = " );
+ for( m=0; m<clus2; m++ ) reporterr( "%d ", topol[l][1][m] );
+ reporterr( "\n" );
+#endif
+#if SKIP
+ skiptable1 = AllocateIntMtx( clus1, 0 );
+ skiptable2 = AllocateIntMtx( clus2, 0 );
+ makeskiptable( clus1, skiptable1, mseq1 ); // allocate suru.
+ makeskiptable( clus2, skiptable2, mseq2 ); // allocate suru.
+#endif
+ for( i=0; i<clus1; i++ )
+ {
+#if SKIP
+// makeskiptable( 1, skiptable1, mseq1+i ); // allocate suru.
+#endif
+ ti = localmem[0][i];
+ ssi = selfscore[localmem[0][i]];
+ for( m=0; m<clus2; m++ )
+ {
+ ssm = selfscore[localmem[1][m]];
+ tm = localmem[1][m];
+ if( ti<tm )
+ {
+ immin = ti;
+ immax = tm;
+ }
+ else
+ {
+ immin = tm;
+ immax = ti;
+ }
+ bunbo = MIN( ssi, ssm );
+ if( bunbo == 0.0 )
+ newdistmtx[immin][immax-immin] = 2.0; // 2013/Oct/17
+ else
+#if SKIP
+ newdistmtx[immin][immax-immin] = ( 1.0 - naivepairscorefast( mseq1[i], mseq2[m], skiptable1[i], skiptable2[m], penalty_dist ) / bunbo ) * 2.0;
+#else
+ newdistmtx[immin][immax-immin] = ( 1.0 - naivepairscore11( mseq1[i], mseq2[m], penalty_dist ) / bunbo ) * 2.0;
+#endif
+ }
+ }
+#if SKIP
+ FreeIntMtx( skiptable1 ); skiptable1 = NULL;
+ FreeIntMtx( skiptable2 ); skiptable2 = NULL;
+#endif
}
- free( topol[l][0] );
- free( topol[l][1] );
- free( topol[l] );
+// free( topol[l][0] ); topol[l][0] = NULL;
+// free( topol[l][1] ); topol[l][1] = NULL;
+// free( topol[l] ); topol[l] = NULL;
+
+
+// reporterr( ">514\n%s\n", aseq[514] );
}
#if SCOREOUT
- fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
+ reporterr( "totalscore = %10.2f\n\n", tscore );
+#endif
+ Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+ Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL );
+ A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 );
+ D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+ G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru?
+ free( effarr1 );
+ free( effarr2 );
+ free( indication1 );
+ free( indication2 );
+ free( fftlog );
+ free( gaplen );
+ free( gapmap );
+ FreeDoubleMtx( dynamicmtx );
+ free( alreadyaligned );
+ FreeIntMtx( localmem );
+ effarr1 = NULL;
+ return( 0 );
+
+ chudan_tbfast:
+
+ Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+ Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL );
+ A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 );
+ D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+ G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru?
+ if( effarr1 ) free( effarr1 ); effarr1 = NULL;
+ if( effarr2 ) free( effarr2 ); effarr2 = NULL;
+ if( indication1 ) free( indication1 ); indication1 = NULL;
+ if( indication2 ) free( indication2 ); indication2 = NULL;
+ if( fftlog ) free( fftlog ); fftlog = NULL;
+ if( gaplen ) free( gaplen ); gaplen = NULL;
+ if( gapmap ) free( gapmap ); gapmap = NULL;
+ if( alreadyaligned ) free( alreadyaligned ); alreadyaligned = NULL;
+ if( dynamicmtx ) FreeDoubleMtx( dynamicmtx ); dynamicmtx = NULL;
+ if( localmem ) FreeIntMtx( localmem ); localmem = NULL;
+#if SKIP
+ if( skiptable1 ) FreeIntMtx( skiptable1 ); skiptable1 = NULL;
+ if( skiptable2 ) FreeIntMtx( skiptable2 ); skiptable2 = NULL;
#endif
+
+ return( 1 );
}
static void WriteOptions( FILE *fp )
else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
}
- fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
+ reporterr( "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
if( use_fft ) fprintf( fp, "FFT on\n" );
fprintf( fp, "tree-base method\n" );
fprintf( fp, "FFT off\n" );
fflush( fp );
}
-
-int main( int argc, char *argv[] )
+static double **preparepartmtx( int nseq )
+{
+ int i;
+ double **val;
+ double size;
+
+ val = (double **)calloc( nseq, sizeof( double *) );;
+ size = 0;
+
+ if( compacttree == 1 )
+ {
+ for( i=0; i<nseq; i++ )
+ {
+ size += (double)sizeof( double ) * nseq;
+ if( size > maxdistmtxsize )
+ {
+ reporterr( "\n\nThe size of full distance matrix is estimated to exceed %.2fGB.\n", maxdistmtxsize / 1000 / 1000 /1000 );
+ reporterr( "Will try the calculation using a %d x %d matrix.\n", nseq, i );
+ reporterr( "This calculation will be slow due to the limited RAM space.\n", i, nseq );
+ reporterr( "To avoid the slowdown, please try '--initialramusage xGB' (x>>%.2f),\n", maxdistmtxsize / 1000 / 1000 /1000 );
+ reporterr( "if larger RAM space is available.\n" );
+ reporterr( "Note that xGB is NOT the upper limit of RAM usage.\n" );
+ reporterr( "Two to three times larger space may be used for building a guide tree.\n" );
+ reporterr( "Memory usage of the MSA stage depends on similarity of input sequences.\n\n" );
+// reporterr( "If the RAM is small, try '--initialramusage xGB' with a smaller x value.\n" );
+ reporterr( "The '--memsavetree' option uses smaller RAM space.\n" );
+ reporterr( "If tree-like relationship can be ignored, try '--pileup' or '--randomchain'.\n\n" );
+ reporterr( "The result of --initialramusage xGB is almost identical to the default, except for rounding differences.\n" );
+
+ reporterr( "In the cases of --memsavetree, --pileup and --randomchain, the result differs from the default.\n\n" );
+ break;
+ }
+ val[i] = (double *)calloc( nseq, sizeof( double ) );
+ }
+ if( i == nseq ) reporterr( "The full matrix will be used.\n" );
+
+ for( ;i<nseq; i++ ) val[i] = NULL; // nen no tame
+ }
+ else
+ {
+ for( i=0; i<nseq; i++ ) val[i] = NULL; // nen no tame
+ }
+ return( val );
+}
+
+int disttbfast( int ngui, int lgui, char **namegui, char **seqgui, int argc, char **argv, int (*callback)(int, int, char*))
{
- static int *nlen;
- static int *nogaplen;
- static char **name, **seq;
- static char **mseq1, **mseq2;
- static char **bseq;
- static double *eff;
+ int *nlen = NULL;
+ int *nogaplen = NULL;
+ char **name = NULL, **seq = NULL;
+ char **mseq1 = NULL, **mseq2 = NULL;
+ char **bseq = NULL;
+ double *eff = NULL;
int i, j;
- static int ***topol;
- static int *addmem;
- static Treedep *dep;
- static float **len;
- FILE *infp;
+ int ***topol = NULL;
+ int *addmem = NULL;
+ Treedep *dep = NULL;
+ double **len = NULL;
+ FILE *infp = NULL;
// FILE *adfp;
char c;
int alloclen;
- float longer, shorter;
- float lenfac;
- float bunbo;
-
- FILE *orderfp, *hat2p;
- int *grpseq;
- char *tmpseq;
- int **pointt;
- float **mtx = NULL; // by D. Mathog
- static short *table1;
+ double longer, shorter;
+ double lenfac;
+ double bunbo;
+
+ FILE *orderfp = NULL, *hat2p = NULL;
+ int *grpseq = NULL;
+ char *tmpseq = NULL;
+ int **pointt = NULL;
+ double **mtx = NULL; // by D. Mathog
+ int *table1 = NULL;
char b[B];
- int ien;
+ int ien, nlim;
+ int includememberres0, includememberres1;
double unweightedspscore;
int alignmentlength;
- char *mergeoralign;
+ char *mergeoralign = NULL;
int foundthebranch;
+ int nsubalignments = 0, maxmem;
+ int **subtable = NULL;
+ int *insubtable = NULL;
+ int *preservegaps = NULL;
+ char ***subalnpt = NULL;
+ int val;
+ char **tmpargv = NULL;
+ int iguidetree;
+ int *selfscore = NULL;
+ int calcpairdists;
+ int **skiptable = NULL;
+ char algbackup;
+ char *originalgaps = NULL;
+ char **addbk = NULL;
+ int **deletelist = NULL;
+ FILE *dlf = NULL;
+ int randomseed;
+ int **localmem = NULL;
+ int posinmem;
+// for compacttree
+ int *mindistfrom = NULL;
+ double *mindist = NULL;
+ double **partmtx = NULL;
+// for compacttree
+
+
+ if( ngui )
+ {
+ initglobalvariables();
+ njob = ngui;
+ nlenmax = 0;
+ for( i=0; i<njob; i++ )
+ {
+ ien = strlen( seqgui[i] );
+ if( ien > nlenmax ) nlenmax = ien;
+ }
+ infp = NULL;
+// stderr = fopen( "/dev/null", "a" ); // Windows????
+ tmpargv = AllocateCharMtx( argc, 0 );
+ for( i=0; i<argc; i++ ) tmpargv[i] = argv[i];
+ gmsg = 1;
+ }
+ else
+ gmsg = 0; // iranai
arguments( argc, argv );
+ algbackup = alg; // tbfast wo disttbfast ni ketsugou shitatame.
#ifndef enablemultithread
nthread = 0;
#endif
- if( inputfile )
+
+ if( ngui )
{
- infp = fopen( inputfile, "r" );
- if( !infp )
+ for( i=0; i<argc; i++ )
{
- fprintf( stderr, "Cannot open %s\n", inputfile );
- exit( 1 );
+// free( tmpargv[i] );
+ argv[i] = tmpargv[i];
}
+ free( tmpargv );
}
else
- infp = stdin;
-
- getnumlen( infp );
- rewind( infp );
-
- if( njob > 20000 )
{
- fprintf( stderr, "The number of sequences must be < %d\n", 20000 );
- fprintf( stderr, "Please try the --parttree option for such large data.\n" );
+ if( inputfile )
+ {
+ infp = fopen( inputfile, "r" );
+ if( !infp )
+ {
+ reporterr( "Cannot open %s\n", inputfile );
+ exit( 1 );
+ }
+ }
+ else
+ infp = stdin;
+
+ getnumlen( infp );
+ rewind( infp );
+ }
+
+ if( njob > 1000000 )
+ {
+ reporterr( "The number of sequences must be < %d\n", 1000000 );
+ reporterr( "Please try the --parttree option for such large data.\n" );
exit( 1 );
}
if( njob < 2 )
{
- fprintf( stderr, "At least 2 sequences should be input!\n"
+ reporterr( "At least 2 sequences should be input!\n"
"Only %d sequence found.\n", njob );
exit( 1 );
}
+ if( specificityconsideration != 0.0 && nlenmax)
+ {
+ if( nlenmax > 100000 )
+ {
+ reporterr( "\n" );
+ reporterr( "Too long to apply --allowshift or --unalignlevel>0\n" );
+ reporterr( "Please use the normal mode.\n" );
+ reporterr( "Please also note that MAFFT does not assume genomic rearrangements.\n" );
+ reporterr( "\n" );
+ exit( 1 );
+ }
+ }
+
+#ifndef mingw
+ setstacksize( 200 * njob ); // topolorder() de ookime no stack wo shiyou.
+#endif
+
+ if( subalignment )
+ {
+ readsubalignmentstable( njob, NULL, NULL, &nsubalignments, &maxmem );
+ reporterr( "nsubalignments = %d\n", nsubalignments );
+ reporterr( "maxmem = %d\n", maxmem );
+ subtable = AllocateIntMtx( nsubalignments, maxmem+1 );
+ insubtable = AllocateIntVec( njob );
+ preservegaps = AllocateIntVec( njob );
+ for( i=0; i<njob; i++ ) insubtable[i] = 0;
+ for( i=0; i<njob; i++ ) preservegaps[i] = 0;
+ subalnpt = AllocateCharCub( nsubalignments, maxmem, 0 );
+ readsubalignmentstable( njob, subtable, preservegaps, NULL, NULL );
+ }
+
+
seq = AllocateCharMtx( njob, nlenmax*1+1 );
mseq1 = AllocateCharMtx( njob, 0 );
mseq2 = AllocateCharMtx( njob, 0 );
- topol = AllocateIntCub( njob, 2, 0 );
- len = AllocateFloatMtx( njob, 2 );
eff = AllocateDoubleVec( njob );
mergeoralign = AllocateCharVec( njob );
if( nadd ) addmem = AllocateIntVec( nadd+1 );
+ localmem = AllocateIntMtx( 2, njob+1 );
+
#if 0
Read( name, nlen, seq );
readData( infp, name, nlen, seq );
name = AllocateCharMtx( njob, B+1 );
nlen = AllocateIntVec( njob );
nogaplen = AllocateIntVec( njob );
- readData_pointer( infp, name, nlen, seq );
- fclose( infp );
+ if( ngui )
+ {
+ if( copydatafromgui( namegui, seqgui, name, nlen, seq ) )
+ exit( 1 );
+ }
+ else
+ {
+ readData_pointer( infp, name, nlen, seq );
+ fclose( infp );
+ }
#endif
+
constants( njob, seq );
#if 0
- fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
+ reporterr( "params = %d, %d, %d\n", penalty, penalty_ex, offset );
#endif
initSignalSM();
c = seqcheck( seq );
if( c )
{
- fprintf( stderr, "Illegal character %c\n", c );
+ reporterr( "Illegal character %c\n", c );
+ exit( 1 );
+ }
+
+ reporterr( "\n" );
+
+// reporterr( "tuplesize = %d, dorp = %c\n", tuplesize, dorp );
+ if( dorp == 'p' && tuplesize != 6 )
+ {
+ reporterr( "tuplesize must be 6 for aa sequence\n" );
+ exit( 1 );
+ }
+ if( dorp == 'd' && tuplesize != 6 && tuplesize != 10 )
+ {
+ reporterr( "tuplesize must be 6 or 10 for dna sequence\n" );
exit( 1 );
}
- fprintf( stderr, "\n" );
+ if( treein )
+ {
+ int npickx;
+ treein = check_guidetreefile( &randomseed, &npickx, &maxdistmtxsize );
+ if( treein == 't' )
+ {
+ varpairscore( njob, npickx, nlenmax, seq, randomseed );
+ exit( 1 );
+ }
+ else if( treein == 'c' )
+ {
+ compacttree = 1;
+ treein = 0;
+ use_fft = 0; // kankeinai?
+// maxdistmtxsize = 5 * 1000 * 1000; // 5GB. ato de kahen ni suru.
+// maxdistmtxsize = 1.0 * 1000 * 1000 * 1000; // 5GB. ato de kahen ni suru.
+ }
+ else if( treein == 'C' )
+ {
+ compacttree = 2;
+ treein = 0;
+ use_fft = 0; // kankeinai?
+ }
+ else if( treein == 'a' )
+ {
+// reporterr( "Compute pairwise scores\n" );
+ if( njob > 200000 )
+ {
+ reporterr( "Chain?\n" );
+ treein = 's';
+ nguidetree = 1;
+ }
+ else if( njob < 100 || 't' == varpairscore( njob, npickx, nlenmax, seq, randomseed ) )
+ {
+ if( treein == 'c' ) exit( 1 );
+ reporterr( "Tree!\n" );
+ treein = 0;
+ nguidetree = 2;
+ }
+ else
+ {
+ reporterr( "Chain!\n" );
+ treein = 's';
+ nguidetree = 1;
+ }
+ }
+ else if ( treein != 0 ) // auto no toki arieru
+ nguidetree = 1;
+ }
+
+ if( compacttree == 1 )
+ {
+ if( maxdistmtxsize > (double)njob * (njob-1) * sizeof( double ) / 2 )
+ {
+ reporterr( "Use conventional tree.\n" );
+ compacttree = 0;
+ }
+ }
if( !treein )
{
- fprintf( stderr, "\n\nMaking a distance matrix ..\n" );
- fflush( stderr );
+ reporterr( "\n\nMaking a distance matrix ..\n" );
+ if( callback && callback( 0, 0, "Distance matrix" ) ) goto chudan;
- tmpseq = AllocateCharVec( nlenmax+1 );
+ tmpseq = AllocateCharVec( nlenmax+1 );
grpseq = AllocateIntVec( nlenmax+1 );
pointt = AllocateIntMtx( njob, nlenmax+1 );
- mtx = AllocateFloatHalfMtx( njob );
- if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
+ if( !compacttree ) mtx = AllocateFloatHalfMtx( njob );
+ if( dorp == 'd' ) tsize = (int)pow( 4, tuplesize );
else tsize = (int)pow( 6, 6 );
- if( dorp == 'd' )
+ if( dorp == 'd' && tuplesize == 6 )
+ {
+ lenfaca = D6LENFACA;
+ lenfacb = D6LENFACB;
+ lenfacc = D6LENFACC;
+ lenfacd = D6LENFACD;
+ }
+ else if( dorp == 'd' && tuplesize == 10 )
{
- lenfaca = DLENFACA;
- lenfacb = DLENFACB;
- lenfacc = DLENFACC;
- lenfacd = DLENFACD;
+ lenfaca = D10LENFACA;
+ lenfacb = D10LENFACB;
+ lenfacc = D10LENFACC;
+ lenfacd = D10LENFACD;
}
else
{
nogaplen[i] = strlen( tmpseq );
if( nogaplen[i] < 6 )
{
-// fprintf( stderr, "Seq %d, too short, %d characters\n", i+1, nogaplen[i] );
-// fprintf( stderr, "Please use mafft-ginsi, mafft-linsi or mafft-ginsi\n\n\n" );
+// reporterr( "Seq %d, too short, %d characters\n", i+1, nogaplen[i] );
+// reporterr( "Please use mafft-ginsi, mafft-linsi or mafft-ginsi\n\n\n" );
// exit( 1 );
}
if( nogaplen[i] > maxl ) maxl = nogaplen[i];
if( dorp == 'd' ) /* nuc */
{
seq_grp_nuc( grpseq, tmpseq );
- makepointtable_nuc( pointt[i], grpseq );
+// makepointtable_nuc( pointt[i], grpseq );
+// makepointtable_nuc_octet( pointt[i], grpseq );
+ if( tuplesize == 10 )
+ makepointtable_nuc_dectet( pointt[i], grpseq );
+ else if( tuplesize == 6 )
+ makepointtable_nuc( pointt[i], grpseq );
+ else
+ {
+ reporterr( "tuplesize=%d: not supported\n", tuplesize );
+ exit( 1 );
+ }
}
else /* amino */
{
makepointtable( pointt[i], grpseq );
}
}
-#ifdef enablemultithread
- if( nthread > 0 )
+ if( nunknown ) reporterr( "\nThere are %d ambiguous characters.\n", nunknown );
+
+
+ if( compacttree )
{
- distancematrixthread_arg_t *targ;
- int jobpos;
- pthread_t *handle;
- pthread_mutex_t mutex;
- jobpos = 0;
- targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) );
- handle = calloc( nthread, sizeof( pthread_t ) );
- pthread_mutex_init( &mutex, NULL );
+ reporterr( "Compact tree, step 1\n" );
+ mindistfrom = (int *)calloc( njob, sizeof( int ) );
+ mindist = (double *)calloc( njob, sizeof( double ) );
+ selfscore = (int *)calloc( njob, sizeof( int ) );
+ partmtx = preparepartmtx( njob );
- for( i=0; i<nthread; i++ )
- {
- targ[i].thread_no = i;
- targ[i].njob = njob;
- targ[i].jobpospt = &jobpos;
- targ[i].pointt = pointt;
- targ[i].mtx = mtx;
- targ[i].mutex = &mutex;
- pthread_create( handle+i, NULL, distancematrixthread, (void *)(targ+i) );
- }
-
- for( i=0; i<nthread; i++ )
- {
- pthread_join( handle[i], NULL );
- }
- pthread_mutex_destroy( &mutex );
- free( handle );
- free( targ );
- }
- else
-#endif
- {
for( i=0; i<njob; i++ )
{
- table1 = (short *)calloc( tsize, sizeof( short ) );
+ table1 = (int *)calloc( tsize, sizeof( int ) );
if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
- if( i % 10 == 0 )
- {
- fprintf( stderr, "\r% 5d / %d", i+1, njob );
- fflush( stderr );
- }
makecompositiontable_p( table1, pointt[i] );
-
- for( j=i; j<njob; j++ )
- {
- mtx[i][j-i] = (float)commonsextet_p( table1, pointt[j] );
- }
+ selfscore[i] = commonsextet_p( table1, pointt[i] );
free( table1 );
+ table1 = NULL;
}
- }
- fprintf( stderr, "\ndone.\n\n" );
- fflush( stderr );
- ien = njob-1;
+ commonsextet_p( NULL, NULL );
- for( i=0; i<ien; i++ )
- {
- for( j=i+1; j<njob; j++ )
+#ifdef enablemultithread
+ if( nthread > 0 )
{
- if( nogaplen[i] > nogaplen[j] )
+ compactdistmtxthread_arg_t *targ;
+ int jobpos;
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+ double **mindistthread;
+ int **mindistfromthread;
+ jobpos = 0;
+ targ = calloc( nthread, sizeof( compactdistmtxthread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ mindistthread = AllocateDoubleMtx( nthread, njob );
+ mindistfromthread = AllocateIntMtx( nthread, njob );
+ pthread_mutex_init( &mutex, NULL );
+
+
+ for( j=0; j<nthread; j++ )
{
- longer=(float)nogaplen[i];
- shorter=(float)nogaplen[j];
+ for( i=0; i<njob; i++ )
+ {
+ mindistthread[j][i] = 999.9;
+ mindistfromthread[j][i] = -1;
+ }
+ targ[j].thread_no = j;
+ targ[j].nogaplen = nogaplen;
+ targ[j].pointt = pointt;
+ targ[j].selfscore = selfscore;
+ targ[j].partmtx = partmtx;
+ targ[j].njob = njob;
+ targ[j].mindist = mindistthread[j];
+ targ[j].mindistfrom = mindistfromthread[j];
+ targ[j].jobpospt = &jobpos;
+ targ[j].mutex = &mutex;
+
+ pthread_create( handle+j, NULL, compactdisthalfmtxthread, (void *)(targ+j) );
}
- else
+
+ for( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );
+
+ for( i=0; i<njob; i++ )
{
- longer=(float)nogaplen[j];
- shorter=(float)nogaplen[i];
+ mindist[i] = 999.9;
+ mindistfrom[i] = -1;
+ for( j=0; j<nthread; j++ )
+ {
+ if( mindistthread[j][i] < mindist[i] )
+ {
+ mindist[i] = mindistthread[j][i];
+ mindistfrom[i] = mindistfromthread[j][i];
+ }
+ }
}
-// lenfac = 3.0 / ( LENFACA + LENFACB / ( longer + LENFACC ) + shorter / longer * LENFACD );
- lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
-// lenfac = 1.0;
-// fprintf( stderr, "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter );
- bunbo = MIN( mtx[i][0], mtx[j][0] );
- if( bunbo == 0.0 )
- mtx[i][j-i] = 1.0;
- else
- mtx[i][j-i] = ( 1.0 - mtx[i][j-i] / bunbo ) * lenfac;
-// fprintf( stdout, "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo );
+ for( i=0; i<njob; i++ ) mindist[i] -= preferenceval( i, mindistfrom[i], njob ); // for debug
+
+
+ pthread_mutex_destroy( &mutex );
+ FreeDoubleMtx( mindistthread );
+ FreeIntMtx( mindistfromthread );
+ free( handle );
+ free( targ );
+
}
- }
- if( disopt )
- {
- for( i=0; i<njob; i++ )
+ else
+#endif
{
- sprintf( b, "=lgth = %04d", nogaplen[i] );
- strins( b, name[i] );
+ compactdistmtxthread_arg_t *targ;
+ int jobpos;
+
+ jobpos = 0;
+ targ = calloc( 1, sizeof( compactdistmtxthread_arg_t ) );
+
+ {
+ for( i=0; i<njob; i++ )
+ {
+ mindist[i] = 999.9;
+ mindistfrom[i] = -1;
+ }
+ targ[0].thread_no = 0;
+ targ[0].nogaplen = nogaplen;
+ targ[0].pointt = pointt;
+ targ[0].selfscore = selfscore;
+ targ[0].partmtx = partmtx;
+ targ[0].njob = njob;
+ targ[0].mindist = mindist;
+ targ[0].mindistfrom = mindistfrom;
+ targ[0].jobpospt = &jobpos;
+
+ compactdisthalfmtxthread( targ );
+ }
+
+ free( targ );
+
+ for( i=0; i<njob; i++ ) mindist[i] -= preferenceval( i, mindistfrom[i], njob ); // for debug
}
- }
- free( grpseq );
- free( tmpseq );
- FreeIntMtx( pointt );
-#if 1 // writehat2 wo kakinaosu
- if( distout )
- {
- hat2p = fopen( "hat2", "w" );
- WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, mtx );
- fclose( hat2p );
- }
-#endif
+// for( i=0; i<njob; i++ ) printf( "mindist[%d] = %f, mindistfrom[%d] = %d\n", i, mindist[i], i, mindistfrom[i] );
+ reporterr( "\ndone.\n" );
- }
- else {
-#if 0 // readhat2 wo kakinaosu
- fprintf( stderr, "Loading 'hat2' ... " );
- prep = fopen( "hat2", "r" );
- if( prep == NULL ) ErrorExit( "Make hat2." );
- readhat2_float( prep, njob, name, mtx ); // name chuui
- fclose( prep );
- fprintf( stderr, "done.\n" );
+#if 0
+ reporterr( "\npartmtx = .\n" );
+ for( i=0; i<njob; i++ )
+ {
+ reporterr( "i=%d\n", i );
+ if( partmtx[i] ) for( j=0; j<njob; j++ ) reporterr( "%f ", partmtx[i][j]);
+ else reporterr( "nil" );
+ reporterr( "\n", i );
+ }
#endif
- }
-
- if( treein )
- {
- fprintf( stderr, "Loading a tree ... " );
- loadtree( njob, topol, len, name, nogaplen, dep );
- }
- else if( topin )
- {
- fprintf( stderr, "Loading a topology ... " );
- loadtop( njob, mtx, topol, len );
- FreeFloatHalfMtx( mtx, njob );
- }
- else if( treeout )
- {
- fprintf( stderr, "Constructing a UPGMA tree ... " );
-
- fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( njob, mtx, topol, len, name, nogaplen, dep );
-// veryfastsupg_float_realloc_nobk_halfmtx_treeout( njob, mtx, topol, len, name, nogaplen );
+ }
+ else
+ {
+#ifdef enablemultithread
+ if( nthread > 0 )
+ {
+ distancematrixthread_arg_t *targ;
+ int jobpos;
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+
+ jobpos = 0;
+ targ = calloc( nthread, sizeof( distancematrixthread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex, NULL );
+
+ for( i=0; i<nthread; i++ )
+ {
+ targ[i].thread_no = i;
+ targ[i].njob = njob;
+ targ[i].jobpospt = &jobpos;
+ targ[i].pointt = pointt;
+ targ[i].mtx = mtx;
+ targ[i].mutex = &mutex;
+
+ pthread_create( handle+i, NULL, distancematrixthread, (void *)(targ+i) );
+ }
+
+ for( i=0; i<nthread; i++ )
+ {
+ pthread_join( handle[i], NULL );
+ }
+ pthread_mutex_destroy( &mutex );
+ free( handle );
+ free( targ );
+ }
+ else
+#endif
+ {
+ for( i=0; i<njob; i++ )
+ {
+ table1 = (int *)calloc( tsize, sizeof( int ) );
+ if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+ if( i % 100 == 0 )
+ {
+ reporterr( "\r% 5d / %d", i+1, njob );
+ if( callback && callback( 0, i*25/njob, "Distance matrix" ) ) goto chudan;
+ }
+ makecompositiontable_p( table1, pointt[i] );
+
+ for( j=i; j<njob; j++ )
+ {
+ mtx[i][j-i] = (double)commonsextet_p( table1, pointt[j] );
+ }
+ free( table1 ); table1 = NULL;
+ }
+ }
+ reporterr( "\ndone.\n\n" );
+ ien = njob-1;
+
+ for( i=0; i<ien; i++ )
+ {
+ for( j=i+1; j<njob; j++ )
+ {
+ if( nogaplen[i] > nogaplen[j] )
+ {
+ longer=(double)nogaplen[i];
+ shorter=(double)nogaplen[j];
+ }
+ else
+ {
+ longer=(double)nogaplen[j];
+ shorter=(double)nogaplen[i];
+ }
+// if( tuplesize == 6 )
+ lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
+// else
+// lenfac = 1.0;
+// reporterr( "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter );
+ bunbo = MIN( mtx[i][0], mtx[j][0] );
+ if( bunbo == 0.0 )
+ mtx[i][j-i] = 2.0; // 2013/Oct/17 -> 2bai
+ else
+ mtx[i][j-i] = ( 1.0 - mtx[i][j-i] / bunbo ) * lenfac * 2.0; // 2013/Oct/17 -> 2bai
+// reporterr( "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo );
+ }
+ }
+ if( disopt )
+ {
+ for( i=0; i<njob; i++ )
+ {
+ sprintf( b, "=lgth = %04d", nogaplen[i] );
+ strins( b, name[i] );
+ }
+ }
+ FreeIntMtx( pointt ); pointt = NULL;
+ commonsextet_p( NULL, NULL );
+ }
+ free( grpseq ); grpseq = NULL;
+ free( tmpseq ); tmpseq = NULL;
+#if 0 // writehat2 wo kakinaosu -> iguidetree loop nai ni idou
+ if( distout )
+ {
+ hat2p = fopen( "hat2", "w" );
+ WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, mtx );
+ fclose( hat2p );
+ }
+#endif
- FreeFloatHalfMtx( mtx, njob );
}
- else
+#if 0
+ else
{
- fprintf( stderr, "Constructing a UPGMA tree ... " );
- fixed_musclesupg_float_realloc_nobk_halfmtx( njob, mtx, topol, len, dep );
- FreeFloatHalfMtx( mtx, njob );
+ reporterr( "Loading 'hat2' ... " );
+ prep = fopen( "hat2", "r" );
+ if( prep == NULL ) ErrorExit( "Make hat2." );
+ readhat2_double( prep, njob, name, mtx ); // name chuui
+ fclose( prep );
+ reporterr( "done.\n" );
}
-// else
-// ErrorExit( "Incorrect tree\n" );
- fprintf( stderr, "\ndone.\n\n" );
- fflush( stderr );
+#endif
- orderfp = fopen( "order", "w" );
- if( !orderfp )
- {
- fprintf( stderr, "Cannot open 'order'\n" );
- exit( 1 );
- }
- for( i=0; (j=topol[njob-2][0][i])!=-1; i++ )
- {
- fprintf( orderfp, "%d\n", j );
- }
- for( i=0; (j=topol[njob-2][1][i])!=-1; i++ )
- {
- fprintf( orderfp, "%d\n", j );
- }
- fclose( orderfp );
+// reporterr( "after computing distance matrix," );
+// use_getrusage();
- if( ( treeout || distout ) && noalign )
+ if( nadd && keeplength )
{
- writeData_pointer( stdout, njob, name, nlen, seq );
- fprintf( stderr, "\n" );
- SHOWVERSION;
- return( 0 );
- }
-
+ originalgaps = (char *)calloc( nlenmax+1, sizeof( char) );
+ recordoriginalgaps( originalgaps, njob-nadd, seq );
- if( tbrweight )
- {
- weight = 3;
-#if 0
- utree = 0; counteff( njob, topol, len, eff ); utree = 1;
-#else
- counteff_simple_float( njob, topol, len, eff );
-#endif
+ if( mapout )
+ {
+ addbk = (char **)calloc( nadd+1, sizeof( char * ) );
+ for( i=0; i<nadd; i++ )
+ {
+ ien = strlen( seq[njob-nadd+i] );
+ addbk[i] = (char *)calloc( ien + 1, sizeof( char ) );
+ gappick0( addbk[i], seq[njob-nadd+i] );
+ }
+ addbk[nadd] = NULL;
+ }
+ else
+ addbk = NULL;
}
else
{
- for( i=0; i<njob; i++ ) eff[i] = 1.0;
+ originalgaps = NULL;
+ addbk = NULL;
}
+
-#if 0
- for( i=0; i<njob; i++ )
- fprintf( stdout, "eff[%d] = %20.16f\n", i, eff[i] );
- exit( 1 );
-#endif
+ for( iguidetree=0; iguidetree<nguidetree; iguidetree++ )
+// for( iguidetree=0; ; iguidetree++ )
+ {
- FreeFloatMtx( len );
+ alg = algbackup; // tbfast wo disttbfast ni ketsugou shitatame.
- bseq = AllocateCharMtx( njob, nlenmax*2+1 );
- alloclen = nlenmax*2+1;
- if( nadd )
- {
- alignmentlength = strlen( seq[0] );
- for( i=0; i<njob-nadd; i++ )
+ topol = AllocateIntCub( njob, 2, 0 );
+ len = AllocateFloatMtx( njob, 2 );
+
+ if( iguidetree == nguidetree - 1 ) calcpairdists = 0;
+ else calcpairdists = 1;
+
+ if( treein )
{
- if( alignmentlength != strlen( seq[i] ) )
+ nguidetree = 1; // iranai
+ calcpairdists = 0; // iranai
+ if( treein == (int)'l' )
+ {
+ loadtree( njob, topol, len, name, nogaplen, dep, treeout );
+ }
+ else if( treein == (int)'s' )
{
- fprintf( stderr, "#################################################################################\n" );
- fprintf( stderr, "# ERROR! #\n" );
- fprintf( stderr, "# The original%4d sequences must be aligned #\n", njob-nadd );
- fprintf( stderr, "#################################################################################\n" );
+ createchain( njob, topol, len, name, nogaplen, dep, treeout, 1, randomseed );
+ nthread = 0;
+ weight = 0; // mafft.tmpl kara idou
+ tbrweight = 0; // mafft.tmpl kara idou
+ }
+ else if( treein == (int)'p' )
+ {
+ createchain( njob, topol, len, name, nogaplen, dep, treeout, 0, randomseed );
+ nthread = 0;
+ weight = 0; // mafft.tmpl kara idou
+ tbrweight = 0; // mafft.tmpl kara idou
+ }
+ else
+ {
+ reporterr( "Error. treein = %d or %c\n", treein, treein );
exit( 1 );
}
}
- if( addprofile )
+ else if( topin )
+ {
+ reporterr( "Loading a topology ... " );
+ reporterr( "--topin has been disabled\n" );
+ exit( 1 );
+// loadtop( njob, mtx, topol, len );
+// FreeFloatHalfMtx( mtx, njob );
+ }
+ else
{
- alignmentlength = strlen( seq[njob-nadd] );
- for( i=njob-nadd; i<njob; i++ )
+ if( distout )
{
- if( alignmentlength != strlen( seq[i] ) )
+ hat2p = fopen( "hat2", "w" );
+ WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, mtx );
+ // writehat2 wo kakinaosu
+ fclose( hat2p );
+
+ if( !treeout && noalign ) // 2016Jul31
{
- fprintf( stderr, "###############################################################################\n" );
- fprintf( stderr, "# ERROR! #\n" );
- fprintf( stderr, "# The%4d additional sequences must be aligned #\n", nadd );
- fprintf( stderr, "# Otherwise, try the '--add' option, instead of '--addprofile' option. #\n" );
- fprintf( stderr, "###############################################################################\n" );
- exit( 1 );
+ writeData_pointer( stdout, njob, name, nlen, seq );
+ reporterr( "\n" );
+ SHOWVERSION;
+ goto chudan;
+// return( 0 );
}
}
- for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
- addmem[nadd] = -1;
- foundthebranch = 0;
- for( i=0; i<njob-1; i++ )
+
+ if( subalignment ) // merge error no tame
{
- if( samemember( topol[i][0], addmem ) ) // jissainiha nai
- {
- mergeoralign[i] = '1';
- foundthebranch = 1;
- }
- else if( samemember( topol[i][1], addmem ) )
+ reporterr( "Constructing a UPGMA tree ... " );
+ fixed_supg_double_realloc_nobk_halfmtx_treeout_constrained( njob, mtx, topol, len, name, nlen, dep, nsubalignments, subtable, !calcpairdists );
+ if( !calcpairdists )
{
- mergeoralign[i] = '2';
- foundthebranch = 1;
+ FreeFloatHalfMtx( mtx, njob ); mtx = NULL;
}
- else
+ }
+ else if( compacttree ) // merge error no tame
+ {
+ reporterr( "Constructing a tree ... " );
+ compacttree_memsaveselectable( njob, partmtx, mindistfrom, mindist, pointt, selfscore, bseq, skiptable, topol, len, name, nogaplen, dep, treeout, compacttree, 1 );
+ if( mindistfrom ) free( mindistfrom ); mindistfrom = NULL;
+ if( mindist ) free( mindist );; mindist = NULL;
+ if( selfscore ) free( selfscore ); selfscore = NULL;
+ if( bseq ) FreeCharMtx( bseq ); bseq = NULL; // nikaime dake
+ if( skiptable) FreeIntMtx( skiptable ); skiptable = NULL; // nikaime dake
+ if( pointt ) FreeIntMtx( pointt ); pointt = NULL; // ikkaime dake.
+ free( partmtx );
+ }
+ else if( treeout ) // merge error no tame
+ {
+ reporterr( "Constructing a UPGMA tree (treeout, efffree=%d) ... ", !calcpairdists );
+ fixed_musclesupg_double_realloc_nobk_halfmtx_treeout_memsave( njob, mtx, topol, len, name, nogaplen, dep, !calcpairdists );
+ if( !calcpairdists )
{
- mergeoralign[i] = 'n';
+ FreeFloatHalfMtx( mtx, njob ); mtx = NULL;
}
}
- if( !foundthebranch )
+ else
{
- fprintf( stderr, "###############################################################################\n" );
- fprintf( stderr, "# ERROR! #\n" );
- fprintf( stderr, "# There is no appropriate position to add the%4d sequences in the guide tree.#\n", nadd );
- fprintf( stderr, "# Check whether the%4d sequences form a monophyletic cluster. #\n", nadd );
- fprintf( stderr, "# If not, try the '--add' option, instead of the '--addprofile' option. #\n" );
- fprintf( stderr, "############################################################################### \n" );
- exit( 1 );
+ reporterr( "Constructing a UPGMA tree (efffree=%d) ... ", !calcpairdists );
+ fixed_musclesupg_double_realloc_nobk_halfmtx_memsave( njob, mtx, topol, len, dep, 1, !calcpairdists );
+ if( !calcpairdists )
+ {
+ FreeFloatHalfMtx( mtx, njob ); mtx = NULL;
+ }
}
- commongappick( nadd, seq+njob-nadd );
- for( i=njob-nadd; i<njob; i++ ) strcpy( bseq[i], seq[i] );
+ }
+// else
+// ErrorExit( "Unknown tree method\n" );
+
+
+
+
+ if( calcpairdists ) selfscore = AllocateIntVec( njob );
+
+
+ if( callback && callback( 0, 25, "Guide tree" ) ) goto chudan;
+ reporterr( "\ndone.\n\n" );
+ if( callback && callback( 0, 50, "Guide tree" ) ) goto chudan;
+
+ if( sparsepickup && iguidetree == nguidetree-1 )
+ {
+ reporterr( "Sparsepickup! \n" );
+ pickup( njob, nogaplen, topol, name, seq );
+ reporterr( "done. \n" );
+ SHOWVERSION;
+ goto chudan;
+ }
+// reporterr( "after tree building" );
+// use_getrusage();
+
+
+ if( treein == 's' || treein == 'p' )
+ {
+ localmem[0][0] = topol[0][0][0];
+ for( i=1; i<njob; i++ )
+ localmem[0][i] = topol[i-1][1][0];
}
else
{
- for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'n';
- for( j=njob-nadd; j<njob; j++ )
+ localmem[0][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[0], &posinmem, topol, dep, njob-2, 2 );
+ }
+
+ orderfp = fopen( "order", "w" );
+ if( !orderfp )
+ {
+ reporterr( "Cannot open 'order'\n" );
+ exit( 1 );
+ }
+#if 0
+ for( i=0; (j=topol[njob-2][0][i])!=-1; i++ )
+ {
+ fprintf( orderfp, "%d\n", j );
+ }
+ for( i=0; (j=topol[njob-2][1][i])!=-1; i++ )
+ {
+ fprintf( orderfp, "%d\n", j );
+ }
+#else
+ for( i=0; i<njob; i++ )
+ fprintf( orderfp, "%d\n", localmem[0][i] );
+#endif
+ fclose( orderfp );
+
+
+
+
+ if( ( treeout || distout ) && noalign )
+ {
+ writeData_pointer( stdout, njob, name, nlen, seq );
+ reporterr( "\n" );
+ SHOWVERSION;
+ goto chudan;
+// return( 0 );
+ }
+
+ if( tbrweight )
+ {
+ weight = 3;
+#if 0
+ utree = 0; counteff( njob, topol, len, eff ); utree = 1;
+#else
+ counteff_simple_double_nostatic_memsave( njob, topol, len, dep, eff );
+// counteff_simple_double_nostatic( njob, topol, len, eff );
+#endif
+ }
+ else
+ {
+ for( i=0; i<njob; i++ ) eff[i] = 1.0;
+ }
+
+#if 0
+ for( i=0; i<njob; i++ )
+ reporterr( "eff[%d] = %20.16f\n", i, eff[i] );
+ exit( 1 );
+#endif
+
+
+ FreeFloatMtx( len ); len = NULL;
+
+ bseq = AllocateCharMtx( njob, nlenmax*2+1 );
+ alloclen = nlenmax*2+1;
+
+
+
+ if( nadd )
+ {
+ alignmentlength = strlen( seq[0] );
+ for( i=0; i<njob-nadd; i++ )
{
- addmem[0] = j;
- addmem[1] = -1;
+ if( alignmentlength != strlen( seq[i] ) )
+ {
+ reporterr( "#################################################################################\n" );
+ reporterr( "# ERROR!\n" );
+ reporterr( "# The original %d sequences must be aligned\n", njob-nadd );
+ reporterr( "# alignmentlength = %d, but strlen(seq[%d])=%d\n", alignmentlength, i, (int)strlen( seq[i] ) );
+ reporterr( "#################################################################################\n" );
+ goto chudan; // TEST!!
+ //exit( 1 );
+ }
+ }
+ if( addprofile )
+ {
+ alignmentlength = strlen( seq[njob-nadd] );
+ for( i=njob-nadd; i<njob; i++ )
+ {
+ if( alignmentlength != strlen( seq[i] ) )
+ {
+ reporterr( "###############################################################################\n" );
+ reporterr( "# ERROR!\n" );
+ reporterr( "# The %d additional sequences must be aligned\n", nadd );
+ reporterr( "# Otherwise, try the '--add' option, instead of '--addprofile' option.\n" );
+ reporterr( "###############################################################################\n" );
+ exit( 1 );
+ }
+ }
+ for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
+ addmem[nadd] = -1;
+ foundthebranch = 0;
for( i=0; i<njob-1; i++ )
{
- if( samemember( topol[i][0], addmem ) ) // arieru
+ localmem[0][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[0], &posinmem, topol, dep, i, 0 );
+ localmem[1][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[1], &posinmem, topol, dep, i, 1 );
+
+ if( samemember( localmem[0], addmem ) ) // jissainiha nai
{
-// fprintf( stderr, "HIT!\n" );
- if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
- else mergeoralign[i] = '1';
+ mergeoralign[i] = '1';
+ foundthebranch = 1;
}
- else if( samemember( topol[i][1], addmem ) )
+ else if( samemember( localmem[1], addmem ) ) // samemembern ni henkou kanou
+ {
+ mergeoralign[i] = '2';
+ foundthebranch = 1;
+ }
+ else
{
-// fprintf( stderr, "HIT!\n" );
- if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
- else mergeoralign[i] = '2';
+ mergeoralign[i] = 'n';
}
}
+ if( !foundthebranch )
+ {
+ reporterr( "###############################################################################\n" );
+ reporterr( "# ERROR!\n" );
+ reporterr( "# There is no appropriate position to add the %d sequences in the guide tree.\n", nadd );
+ reporterr( "# Check whether the %d sequences form a monophyletic cluster.\n", nadd );
+ reporterr( "# If not, try the '--add' option, instead of the '--addprofile' option.\n" );
+ reporterr( "############################################################################### \n" );
+ exit( 1 );
+ }
+ commongappick( nadd, seq+njob-nadd );
+ for( i=njob-nadd; i<njob; i++ ) strcpy( bseq[i], seq[i] );
+ }
+ else
+ {
+ for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'n';
+#if 0
+ for( j=njob-nadd; j<njob; j++ )
+ {
+ addmem[0] = j;
+ addmem[1] = -1;
+ for( i=0; i<njob-1; i++ )
+ {
+ reporterr( "Looking for samemember, %d-%d/%d\n", j, i, njob );
+ localmem[0][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[0], &posinmem, topol, dep, i, 0 );
+ localmem[1][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[1], &posinmem, topol, dep, i, 1 );
+
+ if( samemembern( localmem[0], addmem, 1 ) ) // arieru
+ {
+// reporterr( "HIT!\n" );
+ if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
+ else mergeoralign[i] = '1';
+ }
+ else if( samemembern( localmem[1], addmem, 1 ) )
+ {
+// reporterr( "HIT!\n" );
+ if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
+ else mergeoralign[i] = '2';
+ }
+ }
+ }
+#else
+ for( i=0; i<njob-1; i++ )
+ {
+// reporterr( "Looking for samemember, %d-%d/%d\n", j, i, njob );
+ localmem[0][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[0], &posinmem, topol, dep, i, 0 );
+ localmem[1][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[1], &posinmem, topol, dep, i, 1 );
+
+ for( j=njob-nadd; j<njob; j++ )
+ {
+ addmem[0] = j;
+ addmem[1] = -1;
+
+ if( samemembern( localmem[0], addmem, 1 ) ) // arieru
+ {
+// reporterr( "HIT!\n" );
+ if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
+ else mergeoralign[i] = '1';
+ }
+ else if( samemembern( localmem[1], addmem, 1 ) )
+ {
+// reporterr( "HIT!\n" );
+ if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
+ else mergeoralign[i] = '2';
+ }
+ }
+ }
+#endif
+
+ for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
+ addmem[nadd] = -1;
+ nlim = njob-1;
+// for( i=0; i<njob-1; i++ )
+ for( i=0; i<nlim; i++ )
+ {
+ localmem[0][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[0], &posinmem, topol, dep, i, 0 );
+ localmem[1][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[1], &posinmem, topol, dep, i, 1 );
+
+ includememberres0 = includemember( localmem[0], addmem );
+ includememberres1 = includemember( localmem[1], addmem );
+// if( includemember( topol[i][0], addmem ) && includemember( topol[i][1], addmem ) )
+ if( includememberres0 && includememberres1 )
+ {
+ mergeoralign[i] = 'w';
+ }
+ else if( includememberres0 )
+ {
+ mergeoralign[i] = '1';
+ }
+ else if( includememberres1 )
+ {
+ mergeoralign[i] = '2';
+ }
+ }
+#if 0
+ for( i=0; i<njob-1; i++ )
+ {
+ reporterr( "mem0 = " );
+ for( j=0; topol[i][0][j]>-1; j++ ) reporterr( "%d ", topol[i][0][j] );
+ reporterr( "\n" );
+ reporterr( "mem1 = " );
+ for( j=0; topol[i][1][j]>-1; j++ ) reporterr( "%d ", topol[i][1][j] );
+ reporterr( "\n" );
+ reporterr( "i=%d, mergeoralign[] = %c\n", i, mergeoralign[i] );
+ }
+#endif
+ for( i=njob-nadd; i<njob; i++ ) gappick0( bseq[i], seq[i] );
}
- for( i=0; i<nadd; i++ ) addmem[i] = njob-nadd+i;
- addmem[nadd] = -1;
- for( i=0; i<njob-1; i++ )
+// if( !keeplength ) commongappick( njob-nadd, seq );
+ commongappick( njob-nadd, seq );
+
+ for( i=0; i<njob-nadd; i++ ) strcpy( bseq[i], seq[i] );
+
+ }
+//--------------- kokokara ----
+ else if( subalignment )
+ {
+ for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'a';
+ for( i=0; i<nsubalignments; i++ )
{
- if( includemember( topol[i][0], addmem ) && includemember( topol[i][1], addmem ) )
+ reporterr( "Checking subalignment %d:\n", i+1 );
+ alignmentlength = strlen( seq[subtable[i][0]] );
+// for( j=0; subtable[i][j]!=-1; j++ )
+// reporterr( " %d. %-30.30s\n", subtable[i][j]+1, name[subtable[i][j]]+1 );
+ for( j=0; subtable[i][j]!=-1; j++ )
{
- mergeoralign[i] = 'w';
+ if( subtable[i][j] >= njob ) // check sumi
+ {
+ reporterr( "No such sequence, %d.\n", subtable[i][j]+1 );
+ exit( 1 );
+ }
+ if( alignmentlength != strlen( seq[subtable[i][j]] ) )
+ {
+ reporterr( "\n" );
+ reporterr( "###############################################################################\n" );
+ reporterr( "# ERROR!\n" );
+ reporterr( "# Subalignment %d must be aligned.\n", i+1 );
+ reporterr( "# Please check the alignment lengths of following sequences.\n" );
+ reporterr( "#\n" );
+ reporterr( "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength );
+ reporterr( "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) );
+ reporterr( "#\n" );
+ reporterr( "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" );
+ if( subalignmentoffset )
+ {
+ reporterr( "#\n" );
+ reporterr( "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset );
+ reporterr( "# In this case, the rule of numbering is:\n" );
+ reporterr( "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset );
+ reporterr( "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob );
+ }
+ reporterr( "###############################################################################\n" );
+ reporterr( "\n" );
+ goto chudan; // TEST!!
+ //exit( 1 );
+ }
+ insubtable[subtable[i][j]] = 1;
}
- else if( includemember( topol[i][0], addmem ) )
+ for( j=0; j<njob-1; j++ )
{
- mergeoralign[i] = '1';
+ if( includemember( topol[j][0], subtable[i] ) && includemember( topol[j][1], subtable[i] ) )
+ {
+ mergeoralign[j] = 'n';
+ }
}
- else if( includemember( topol[i][1], addmem ) )
+ foundthebranch = 0;
+ for( j=0; j<njob-1; j++ )
{
- mergeoralign[i] = '2';
+ if( samemember( topol[j][0], subtable[i] ) || samemember( topol[j][1], subtable[i] ) )
+ {
+ foundthebranch = 1;
+ reporterr( " -> OK\n" );
+ break;
+ }
}
+ if( !foundthebranch )
+ {
+ system( "cp infile.tree GuideTree" ); // tekitou
+ reporterr( "\n" );
+ reporterr( "###############################################################################\n" );
+ reporterr( "# ERROR!\n" );
+ reporterr( "# Subalignment %d does not seem to form a monophyletic cluster\n", i+1 );
+ reporterr( "# in the guide tree ('GuideTree' in this directory) internally computed.\n" );
+ reporterr( "# If you really want to use this subalignment, pelase give a tree with --treein \n" );
+ reporterr( "# http://mafft.cbrc.jp/alignment/software/treein.html\n" );
+ reporterr( "# http://mafft.cbrc.jp/alignment/software/merge.html\n" );
+ if( subalignmentoffset )
+ {
+ reporterr( "#\n" );
+ reporterr( "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset );
+ reporterr( "# In this case, the rule of numbering is:\n" );
+ reporterr( "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset );
+ reporterr( "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob );
+ }
+ reporterr( "############################################################################### \n" );
+ reporterr( "\n" );
+ goto chudan; // TEST!!
+ //exit( 1 );
+ }
+// commongappick( seq[subtable[i]], subalignment[i] ); // irukamo
}
#if 0
for( i=0; i<njob-1; i++ )
{
- fprintf( stderr, "mem0 = " );
- for( j=0; topol[i][0][j]>-1; j++ ) fprintf( stderr, "%d ", topol[i][0][j] );
- fprintf( stderr, "\n" );
- fprintf( stderr, "mem1 = " );
- for( j=0; topol[i][1][j]>-1; j++ ) fprintf( stderr, "%d ", topol[i][1][j] );
- fprintf( stderr, "\n" );
- fprintf( stderr, "i=%d, mergeoralign[] = %c\n", i, mergeoralign[i] );
+ reporterr( "STEP %d\n", i+1 );
+ reporterr( "group1 = " );
+ for( j=0; topol[i][0][j] != -1; j++ )
+ reporterr( "%d ", topol[i][0][j]+1 );
+ reporterr( "\n" );
+ reporterr( "group2 = " );
+ for( j=0; topol[i][1][j] != -1; j++ )
+ reporterr( "%d ", topol[i][1][j]+1 );
+ reporterr( "\n" );
+ reporterr( "%d -> %c\n\n", i, mergeoralign[i] );
}
#endif
- for( i=njob-nadd; i<njob; i++ ) gappick0( bseq[i], seq[i] );
+
+ for( i=0; i<njob; i++ )
+ {
+ if( insubtable[i] ) strcpy( bseq[i], seq[i] );
+ else gappick0( bseq[i], seq[i] );
+ }
+
+ for( i=0; i<nsubalignments; i++ )
+ {
+ for( j=0; subtable[i][j]!=-1; j++ ) subalnpt[i][j] = bseq[subtable[i][j]];
+ if( !preservegaps[i] ) commongappick( j, subalnpt[i] );
+ }
+
+#if 0 // --> iguidetree loop no soto he
+ FreeIntMtx( subtable );
+ free( insubtable );
+ for( i=0; i<nsubalignments; i++ ) free( subalnpt[i] );
+ free( subalnpt );
+ free( preservegaps );
+#endif
+ }
+//--------------- kokomade ----
+ else
+ {
+ for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
+ for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'a';
}
- commongappick( njob-nadd, seq );
- for( i=0; i<njob-nadd; i++ ) strcpy( bseq[i], seq[i] );
- }
- else
- {
- for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
- for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'a';
- }
+ if( calcpairdists ) for( i=0; i<njob; i++ ) selfscore[i] = naivepairscore11( seq[i], seq[i], penalty_dist ); // (int)?
+
+ reporterr( "Progressive alignment %d/%d... \n", iguidetree+1, nguidetree );
+
+// reporterr( "\nbefore treebase" );
+// use_getrusage();
+
+#ifdef enablemultithread
+ if( nthread > 0 && nadd == 0 )
+ {
+ treebasethread_arg_t *targ;
+ int jobpos;
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+ pthread_cond_t treecond;
+ int *fftlog;
+ int nrun;
+ int nthread_yoyu;
+
+ nthread_yoyu = nthread * 1;
+ nrun = 0;
+ jobpos = 0;
+ targ = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) );
+ fftlog = AllocateIntVec( njob );
+ handle = calloc( nthread_yoyu, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex, NULL );
+ pthread_cond_init( &treecond, NULL );
+
+ for( i=0; i<njob; i++ ) dep[i].done = 0;
+ for( i=0; i<njob; i++ ) fftlog[i] = 1;
+
+ for( i=0; i<nthread_yoyu; i++ )
+ {
+ targ[i].thread_no = i;
+ targ[i].njob = njob;
+ targ[i].nrunpt = &nrun;
+ targ[i].nlen = nlen;
+ targ[i].jobpospt = &jobpos;
+ targ[i].topol = topol;
+ targ[i].dep = dep;
+ targ[i].aseq = bseq;
+ targ[i].effarr = eff;
+ targ[i].alloclenpt = &alloclen;
+ targ[i].fftlog = fftlog;
+ targ[i].mergeoralign = mergeoralign;
+#if 1 // tsuneni SEPARATELYCALCPAIRDISTS
+ targ[i].newdistmtx = NULL;
+ targ[i].selfscore = NULL;
+#else
+ if( calcpairdists ) // except for last cycle
+ {
+ targ[i].newdistmtx = mtx;
+ targ[i].selfscore = selfscore;
+ }
+ else
+ {
+ targ[i].newdistmtx = NULL;
+ targ[i].selfscore = NULL;
+ }
+#endif
+ targ[i].mutex = &mutex;
+ targ[i].treecond = &treecond;
+
+ pthread_create( handle+i, NULL, treebasethread, (void *)(targ+i) );
+ }
+
+ for( i=0; i<nthread_yoyu; i++ )
+ {
+ pthread_join( handle[i], NULL );
+ }
+ pthread_mutex_destroy( &mutex );
+ pthread_cond_destroy( &treecond );
+ free( handle );
+ free( targ );
+ free( fftlog );
+
+// reporterr( "after treebasethread, " );
+// use_getrusage();
+ }
+ else
+#endif
+ {
+#if 0
+ if( calcpairdists ) // except for last
+ {
+ if( treebase( nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, dep, eff, mtx, selfscore, &alloclen, callback ) ) goto chudan;
+ }
+ else
+#endif
+ {
+// if( treebase( keeplength && (iguidetree==nguidetree-1), nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, dep, eff, NULL, NULL, deletemap, deletelag, &alloclen, callback ) ) goto chudan;
+ if( treebase( nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, dep, eff, NULL, NULL, &alloclen, callback ) ) goto chudan;
+ }
+ }
+// reporterr( "after treebase, " );
+// use_getrusage();
+ reporterr( "\ndone.\n\n" );
+ if( callback && callback( 0, 100, "Progressive alignment" ) ) goto chudan;
+// free( topol[njob-1][0] ); topol[njob-1][0]=NULL;
+// free( topol[njob-1][1] ); topol[njob-1][1]=NULL;
+// free( topol[njob-1] ); topol[njob-1]=NULL;
+// free( topol ); topol=NULL;
+ FreeIntCub( topol ); topol = NULL;
+// reporterr( "after freeing topol, " );
+// use_getrusage();
- fprintf( stderr, "Progressive alignment ... \n" );
+// reporterr( "compacttree = %d, calcpairdist = %d\n", compacttree, calcpairdists );
+
+
+// reporterr( "\nbseq[njob-3] = %s\n", bseq[njob-3] );
+// reporterr( "bseq[njob-2] = %s\n", bseq[njob-2] );
+// reporterr( "bseq[njob-1] = %s\n", bseq[njob-1] );
+
+
+
+// Distance matrix from MSA SEPARATELYCALCPAIRDISTS
+// if( iguidetree < nguidetree-1 )
#ifdef enablemultithread
- if( nthread > 0 && nadd == 0 )
- {
- treebasethread_arg_t *targ;
- int jobpos;
- pthread_t *handle;
- pthread_mutex_t mutex;
- pthread_cond_t treecond;
- int *fftlog;
- int nrun;
- int nthread_yoyu;
-
- nthread_yoyu = nthread * 1;
- nrun = 0;
- jobpos = 0;
- targ = calloc( nthread_yoyu, sizeof( treebasethread_arg_t ) );
- fftlog = AllocateIntVec( njob );
- handle = calloc( nthread_yoyu, sizeof( pthread_t ) );
- pthread_mutex_init( &mutex, NULL );
- pthread_cond_init( &treecond, NULL );
-
- for( i=0; i<njob; i++ ) dep[i].done = 0;
- for( i=0; i<njob; i++ ) fftlog[i] = 1;
-
- for( i=0; i<nthread_yoyu; i++ )
- {
- targ[i].thread_no = i;
- targ[i].njob = njob;
- targ[i].nrunpt = &nrun;
- targ[i].nlen = nlen;
- targ[i].jobpospt = &jobpos;
- targ[i].topol = topol;
- targ[i].dep = dep;
- targ[i].aseq = bseq;
- targ[i].effarr = eff;
- targ[i].alloclenpt = &alloclen;
- targ[i].fftlog = fftlog;
- targ[i].mutex = &mutex;
- targ[i].treecond = &treecond;
-
- pthread_create( handle+i, NULL, treebasethread, (void *)(targ+i) );
+// if( nthread>0 && nadd==0 ) if( calcpairdists )
+ if( calcpairdists && !compacttree )
+#else
+// if( 0 && nadd==0 ) if( calcpairdists ) // zettai nai
+ if( calcpairdists && !compacttree )
+#endif
+ {
+ reporterr( "Making a distance matrix from msa.. \n" );
+ skiptable = AllocateIntMtx( njob, 0 );
+ makeskiptable( njob, skiptable, bseq ); // allocate suru.
+#ifdef enablemultithread
+ if( nthread > 0 )
+ {
+ msadistmtxthread_arg_t *targ;
+ Jobtable jobpos;
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+
+ jobpos.i = 0;
+ jobpos.j = 0;
+
+ targ = calloc( nthread, sizeof( msadistmtxthread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex, NULL );
+
+ for( i=0; i<nthread; i++ )
+ {
+ targ[i].thread_no = i;
+ targ[i].njob = njob;
+ targ[i].selfscore = selfscore;
+ targ[i].iscore = mtx;
+ targ[i].seq = bseq;
+ targ[i].skiptable = skiptable;
+ targ[i].jobpospt = &jobpos;
+ targ[i].mutex = &mutex;
+
+ pthread_create( handle+i, NULL, msadistmtxthread, (void *)(targ+i) );
+ }
+
+ for( i=0; i<nthread; i++ )
+ {
+ pthread_join( handle[i], NULL );
+ }
+ pthread_mutex_destroy( &mutex );
+ free( handle );
+ free( targ );
+ }
+ else
+#endif
+ {
+// reporterr( "Check source!\n" );
+// exit( 1 );
+
+#if 1
+ msadistmtxthread_arg_t *targ;
+ Jobtable jobpos;
+
+ jobpos.i = 0;
+ jobpos.j = 0;
+
+ targ = calloc( 1, sizeof( msadistmtxthread_arg_t ) );
+
+ {
+ targ[0].thread_no = 0;
+ targ[0].njob = njob;
+ targ[0].selfscore = selfscore;
+ targ[0].iscore = mtx;
+ targ[0].seq = bseq;
+ targ[0].skiptable = skiptable;
+ targ[0].jobpospt = &jobpos;
+
+ msadistmtxthread( targ );
+ }
+
+ free( targ );
+#endif
+ }
+ if( skiptable) FreeIntMtx( skiptable ); skiptable = NULL;
+ reporterr( "\ndone.\n\n" );
+ free( selfscore ); selfscore = NULL;
+ FreeCharMtx( bseq ); bseq = NULL;
}
-
- for( i=0; i<nthread_yoyu; i++ )
+ else if( calcpairdists && compacttree )
{
- pthread_join( handle[i], NULL );
+ reporterr( "Making a compact tree from msa, step 1.. \n" );
+ skiptable = AllocateIntMtx( njob, 0 );
+ makeskiptable( njob, skiptable, bseq ); // allocate suru.
+ mindistfrom = (int *)calloc( njob, sizeof( int ) );
+ mindist = (double *)calloc( njob, sizeof( double ) );
+ partmtx = preparepartmtx( njob );
+#ifdef enablemultithread
+ if( nthread > 0 )
+ {
+ msacompactdistmtxthread_arg_t *targ;
+ int jobpos;
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+ double **mindistthread;
+ int **mindistfromthread;
+
+ mindistthread = AllocateDoubleMtx( nthread, njob );
+ mindistfromthread = AllocateIntMtx( nthread, njob );
+ targ = calloc( nthread, sizeof( msacompactdistmtxthread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex, NULL );
+ jobpos = 0;
+
+ for( i=0; i<nthread; i++ )
+ {
+ for( j=0; j<njob; j++ )
+ {
+ mindistthread[i][j] = 999.9;
+ mindistfromthread[i][j] = -1;
+ }
+ targ[i].thread_no = i;
+ targ[i].njob = njob;
+ targ[i].selfscore = selfscore;
+ targ[i].partmtx = partmtx;
+ targ[i].seq = bseq;
+ targ[i].skiptable = skiptable;
+ targ[i].jobpospt = &jobpos;
+ targ[i].mindistfrom = mindistfromthread[i];
+ targ[i].mindist = mindistthread[i];
+ targ[i].mutex = &mutex;
+
+ pthread_create( handle+i, NULL, msacompactdisthalfmtxthread, (void *)(targ+i) );
+ }
+
+ for( i=0; i<nthread; i++ ) pthread_join( handle[i], NULL );
+ pthread_mutex_destroy( &mutex );
+
+ for( i=0; i<njob; i++ )
+ {
+ mindist[i] = 999.9;
+ mindistfrom[i] = -1;
+ for( j=0; j<nthread; j++ )
+ {
+ if( mindistthread[j][i] < mindist[i] )
+ {
+ mindist[i] = mindistthread[j][i];
+ mindistfrom[i] = mindistfromthread[j][i];
+ }
+ }
+ }
+ for( i=0; i<njob; i++ ) mindist[i] -= preferenceval( i, mindistfrom[i], njob ); // for debug
+
+ free( handle );
+ free( targ );
+ FreeDoubleMtx( mindistthread );
+ FreeIntMtx( mindistfromthread );
+ }
+ else
+#endif
+ {
+ msacompactdistmtxthread_arg_t *targ;
+ int jobpos;
+ jobpos = 0;
+ targ = calloc( 1, sizeof( msacompactdistmtxthread_arg_t ) );
+
+ {
+ for( j=0; j<njob; j++ )
+ {
+ mindist[j] = 999.9;
+ mindistfrom[j] = -1;
+ }
+ targ[0].thread_no = 0;
+ targ[0].njob = njob;
+ targ[0].selfscore = selfscore;
+ targ[0].partmtx = partmtx;
+ targ[0].seq = bseq;
+ targ[0].skiptable = skiptable;
+ targ[0].jobpospt = &jobpos;
+ targ[0].mindistfrom = mindistfrom;
+ targ[0].mindist = mindist;
+
+ msacompactdisthalfmtxthread( targ );
+// msacompactdistmtxthread( targ );
+ }
+ free( targ );
+ for( i=0; i<njob; i++ ) mindist[i] -= preferenceval( i, mindistfrom[i], njob ); // for debug
+ }
+// free( selfscore ); selfscore = NULL; // mada tsukau
+// FreeCharMtx( bseq ); bseq = NULL; // mada tsukau
+// if( skiptable) FreeIntMtx( skiptable ); skiptable = NULL;
+
+// for( i=0; i<njob; i++ ) printf( "mindist[%d] = %f\n", i, mindist[i] );
+// exit( 1 );
}
- pthread_mutex_destroy( &mutex );
- pthread_cond_destroy( &treecond );
- free( handle );
- free( targ );
- free( fftlog );
+// Distance matrix from MSA end
+// reporterr( "at the end of guidetree loop, " );
+// use_getrusage();
+
}
- else
+#if DEBUG
+ reporterr( "closing trap_g\n" );
#endif
+// fclose( trap_g );
+// reporterr( "after guidetree loop, " );
+// use_getrusage();
+
+ if( keeplength )
{
- treebase( nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, eff, &alloclen );
+
+ dlf = fopen( "_deletelist", "w" );
+ deletelist = (int **)calloc( nadd+1, sizeof( int * ) );
+ for( i=0; i<nadd; i++ )
+ {
+ deletelist[i] = calloc( 1, sizeof( int ) );
+ deletelist[i][0] = -1;
+ }
+ deletelist[nadd] = NULL;
+ ndeleted = deletenewinsertions_whole( njob-nadd, nadd, bseq, bseq+njob-nadd, deletelist );
+
+ for( i=0; i<nadd; i++ )
+ {
+ if( deletelist[i] )
+ for( j=0; deletelist[i][j]!=-1; j++ )
+ fprintf( dlf, "%d %d\n", njob-nadd+i, deletelist[i][j] ); // 0origin
+ }
+ fclose( dlf );
+
+ restoreoriginalgaps( njob, bseq, originalgaps );
+
+ if( mapout )
+ {
+ dlf = fopen( "_deletemap", "w" );
+ reconstructdeletemap( nadd, addbk, deletelist, bseq+njob-nadd, dlf, name+njob-nadd );
+ FreeCharMtx( addbk );
+ addbk = NULL;
+ fclose( dlf );
+ }
+
+ FreeIntMtx( deletelist );
+ deletelist = NULL;
}
- fprintf( stderr, "\ndone.\n\n" );
-#if DEBUG
- fprintf( stderr, "closing trap_g\n" );
-#endif
- fclose( trap_g );
if( scoreout )
{
unweightedspscore = plainscore( njob, bseq );
- fprintf( stderr, "\nSCORE %s = %.0f, ", "(treebase)", unweightedspscore );
- fprintf( stderr, "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) );
- fprintf( stderr, "\n\n" );
+ reporterr( "\nSCORE %s = %.0f, ", "(treebase)", unweightedspscore );
+ reporterr( "SCORE / residue = %f", unweightedspscore / ( njob * strlen( bseq[0] ) ) );
+ reporterr( "\n\n" );
}
- free( mergeoralign );
-// writePre( njob, name, nlen, aseq, !contin );
#if DEBUG
- fprintf( stderr, "writing alignment to stdout\n" );
-#endif
- writeData_pointer( stdout, njob, name, nlen, bseq );
-#if 0
- writeData( stdout, njob, name, nlen, bseq );
+ reporterr( "writing alignment to stdout\n" );
#endif
-#if IODEBUG
- fprintf( stderr, "OSHIMAI\n" );
+
+
+ val = 0;
+ if( ngui )
+ {
+ ien = strlen( bseq[0] );
+ if( ien > lgui )
+ {
+ reporterr( "alignmentlength = %d, gui allocated %d", ien, lgui );
+ val = GUI_LENGTHOVER;
+ }
+ else
+ {
+ for( i=0; i<njob; i++ )
+ {
+#if 1
+ strcpy( seqgui[i], bseq[i] );
+#else
+ free( seqgui[i] );
+ seqgui[i] = bseq[i];
#endif
+ }
+ }
+ }
+ else
+ {
+ writeData_pointer( stdout, njob, name, nlen, bseq );
+ }
+
+ if( spscoreout ) reporterr( "Unweighted sum-of-pairs score = %10.5f\n", sumofpairsscore( njob, bseq ) );
SHOWVERSION;
- return( 0 );
+ if( ndeleted > 0 )
+ {
+ reporterr( "\nTo keep the alignment length, %d letters were DELETED.\n", ndeleted );
+ if( mapout )
+ reporterr( "The deleted letters are shown in the (filename).map file.\n" );
+ else
+ reporterr( "To know the positions of deleted letters, rerun the same command with the --mapout option.\n" );
+ }
+
+
+
+ if( subalignment )
+ {
+ FreeIntMtx( subtable );
+ free( insubtable );
+ for( i=0; i<nsubalignments; i++ ) free( subalnpt[i] );
+ free( subalnpt );
+ free( preservegaps );
+ }
+
+
+#if 1 // seqgui[i] = bseq[i] no toki bseq ha free shinai
+ FreeCharMtx( bseq );
+#endif
+ FreeCharMtx( name );
+ free( nlen );
+
+ free( mergeoralign );
+ FreeCharMtx( seq );
+ free( nogaplen );
+
+ free( mseq1 );
+ free( mseq2 );
+// FreeIntCub( topol ); //
+// FreeFloatMtx( len ); //
+// free( mergeoralign ); //
+ free( dep );
+
+ if( nadd ) free( addmem );
+ FreeIntMtx( localmem );
+ free( eff );
+ freeconstants();
+ closeFiles();
+ FreeCommonIP();
+ if( originalgaps ) free( originalgaps ); originalgaps = NULL;
+ if( deletelist ) FreeIntMtx( deletelist ); deletelist = NULL;
+
+// use_getrusage();
+
+ return( val );
+
+chudan:
+
+ if( nlen ) free( nlen ); nlen = NULL;
+ if( seq ) FreeCharMtx( seq ); seq = NULL;
+ if( mseq1 ) free( mseq1 ); mseq1 = NULL;
+ if( mseq2 ) free( mseq2 ); mseq2 = NULL;
+// if( topol )
+// {
+// for( i=0; i<njob; i++ )
+// {
+// if( topol[i] && topol[i][0] )
+// {
+// free( topol[i][0] ); topol[i][0] = NULL;
+// }
+// if( topol[i] && topol[i][1] )
+// {
+// free( topol[i][1] ); topol[i][1] = NULL;
+// }
+// if( topol[i] ) free( topol[i] ); topol[i] = NULL;
+// }
+// free( topol ); topol = NULL;
+// }
+ if( topol ) FreeIntCub( topol ); topol = NULL;
+ if( len ) FreeFloatMtx( len ); len = NULL;
+ if( eff ) free( eff ); eff = NULL;
+ if( mergeoralign ) free( mergeoralign ); mergeoralign = NULL;
+ if( dep ) free( dep ); dep = NULL;
+ if( addmem ) free( addmem ); addmem = NULL;
+ if( localmem ) FreeIntMtx( localmem ); localmem = NULL;
+ if( name ) FreeCharMtx( name ); name = NULL;
+ if( nogaplen ) free( nogaplen ); nogaplen = NULL;
+
+ if( tmpseq ) free( tmpseq ); tmpseq = NULL;
+ if( grpseq ) free( grpseq ); grpseq = NULL;
+ if( pointt ) FreeIntMtx( pointt ); pointt = NULL;
+ if( mtx ) FreeFloatHalfMtx( mtx, njob ); mtx = NULL;
+ if( table1 ) free( table1 ); table1 = NULL;
+
+ if( bseq ) FreeCharMtx( bseq ); bseq = NULL;
+ if( selfscore ) free( selfscore ); selfscore = NULL;
+ if( skiptable ) FreeIntMtx( skiptable ); skiptable = NULL;
+ if( originalgaps ) free( originalgaps ); originalgaps = NULL;
+ if( deletelist ) FreeIntMtx( deletelist ); deletelist = NULL;
+
+
+ if( subtable ) FreeIntMtx( subtable ); subtable = NULL;
+ if( insubtable ) free( insubtable ); insubtable = NULL;
+ for( i=0; i<nsubalignments; i++ )
+ {
+ if( subalnpt[i] ) free( subalnpt[i] ); subalnpt[i] = NULL;
+ }
+ if( subalnpt ) free( subalnpt ); subalnpt = NULL;
+ if( preservegaps ) free( preservegaps ); preservegaps = NULL;
+
+
+ if( mindistfrom ) free( mindistfrom ); mindistfrom = NULL;
+ if( mindist ) free( mindist ); mindist = NULL;
+
+ freeconstants();
+ closeFiles();
+ FreeCommonIP();
+
+ return( GUI_CANCEL );
}
+int main( int argc, char **argv )
+{
+ int res = disttbfast( 0, 0, NULL, NULL, argc, argv, NULL );
+ if( res == GUI_CANCEL ) res = 0; // treeout de goto chudan wo riyousuru
+ return res;
+}
FILE *hat3p;
char **seq = NULL; // by D.Mathog
char **seq1;
- static char name[M][B];
- static char name1[M][B];
+ static char **name;
+ static char **name1;
static int nlen1[M];
double **mtx;
double **mtx2;
rewind( infp );
+ name = AllocateCharMtx( njob, B+1 );
+ name1 = AllocateCharMtx( njob, B+1 );
seq = AllocateCharMtx( njob, nlenmax+1 );
seq1 = AllocateCharMtx( 2, nlenmax+1 );
mtx = AllocateDoubleMtx( njob, njob );
#if 0
FRead( infp, name, nlen, seq );
#else
- readData( infp, name, nlen, seq );
+ readData_pointer( infp, name, nlen, seq );
#endif
fclose( infp );
hat2p = fopen( hat2file, "w" );
if( !hat2p ) ErrorExit( "Cannot open hat2." );
- WriteHat2( hat2p, njob, name, mtx2 );
+ WriteHat2_pointer( hat2p, njob, name, mtx2 );
fclose( hat2p );
FILE *infp;
char **seq = NULL; // by D.Mathog
char **seq1;
- static char name[M][B];
- static char name1[M][B];
+ char **name;
+ char **name1;
static int nlen1[M];
double **mtx;
double **mtx2;
static int nlen[M];
- char b[B];
+ static char b[B];
double max;
char com[1000];
int opt[M];
rewind( infp );
+ name = AllocateCharMtx( njob, B+1 );
+ name1 = AllocateCharMtx( njob, B+1 );
seq = AllocateCharMtx( njob, nlenmax+1 );
seq1 = AllocateCharMtx( 2, nlenmax+1 );
mtx = AllocateDoubleMtx( njob, njob );
#if 0
FRead( infp, name, nlen, seq );
#else
- readData( infp, name, nlen, seq );
+ readData_pointer( infp, name, nlen, seq );
#endif
fclose( infp );
hat2p = fopen( hat2file, "w" );
if( !hat2p ) ErrorExit( "Cannot open hat2." );
- WriteHat2( hat2p, njob, name, mtx2 );
+ WriteHat2_pointer( hat2p, njob, name, mtx2 );
fclose( hat2p );
#if 1
#define TEST 0
static int treeout = 0;
+static int maxdist = 1;
+static int nadd = 0;
+static int usenaivescoreinsteadofalignmentscore = 0;
#ifdef enablemultithread
typedef struct _jobtable
{
int njob;
int thread_no;
- float *selfscore;
+ double *selfscore;
double **mtx;
char **seq;
+ int **skiptable;
Jobtable *jobpospt;
pthread_mutex_t *mutex;
} thread_arg_t;
+#if 0
void *athread( void *arg )
{
thread_arg_t *targ = (thread_arg_t *)arg;
int njob = targ->njob;
int thread_no = targ->thread_no;
- float *selfscore = targ->selfscore;
+ double *selfscore = targ->selfscore;
double **mtx = targ->mtx;
char **seq = targ->seq;
+ int **skiptable = targ->skiptable;
Jobtable *jobpospt = targ->jobpospt;
int i, j;
- float ssi, ssj, bunbo;
+ double ssi, ssj, bunbo;
+ double mtxv;
+
+ if( njob == 1 ) return( NULL );
while( 1 )
{
j = jobpospt->j;
i = jobpospt->i;
j++;
+// fprintf( stderr, "\n i=%d, j=%d before check\n", i, j );
if( j == njob )
{
+// fprintf( stderr, "\n j = %d, i = %d, njob = %d\n", j, i, njob );
fprintf( stderr, "%4d/%4d (thread %4d), dndpre\r", i+1, njob, thread_no );
i++;
j = i + 1;
if( i == njob-1 )
{
+// fprintf( stderr, "\n i=%d, njob-1=%d\n", i, njob-1 );
pthread_mutex_unlock( targ->mutex );
return( NULL );
}
}
+// fprintf( stderr, "\n i=%d, j=%d after check\n", i, j );
jobpospt->j = j;
jobpospt->i = i;
pthread_mutex_unlock( targ->mutex );
bunbo = MIN( ssi, ssj );
if( bunbo == 0.0 )
- mtx[i][j] = 1.0;
+ mtxv = maxdist;
else
- mtx[i][j] = 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty ) / bunbo;
+ {
+// mtxv = maxdist * ( 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty ) / bunbo );
+ if( usenaivescoreinsteadofalignmentscore )
+ mtxv = maxdist * ( 1.0 - (double)naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], 0.0 ) / bunbo );
+ else
+ mtxv = maxdist * ( 1.0 - (double)naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty ) / bunbo );
+ }
+#if 1
+ if( mtxv < 0.0 )
+ {
+ reporterr( "WARNING: negative distance, mtxv = %f\n", mtxv );
+ mtxv = 0.0;
+ }
+
+ if( mtxv > 9.0 )
+ {
+ fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv );
+ exit( 1 );
+ }
+#else // CHUUI!!! 2012/05/16
+ if( mtxv > 2.0 )
+ {
+ mtxv = 2.0;
+ }
+ if( mtxv < 0.0 )
+ {
+ fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv );
+ exit( 1 );
+ }
+#endif
+ mtx[i][j] = mtxv;
+ }
+}
+#else
+void *athread( void *arg )
+{
+ thread_arg_t *targ = (thread_arg_t *)arg;
+ int njob = targ->njob;
+ int thread_no = targ->thread_no;
+ double *selfscore = targ->selfscore;
+ double **mtx = targ->mtx;
+ char **seq = targ->seq;
+ int **skiptable = targ->skiptable;
+ Jobtable *jobpospt = targ->jobpospt;
+
+ int i, j;
+ double ssi, ssj, bunbo;
+ double mtxv;
+
+ if( njob == 1 ) return( NULL );
+
+ while( 1 )
+ {
+ pthread_mutex_lock( targ->mutex );
+ i = jobpospt->i;
+ if( i == njob-1 )
+ {
+ pthread_mutex_unlock( targ->mutex );
+ return( NULL );
+ }
+ jobpospt->i += 1;
+ pthread_mutex_unlock( targ->mutex );
+ if( i % 100 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no );
+
+
+ ssi = selfscore[i];
+ for( j=i+1; j<njob; j++ )
+ {
+ ssj = selfscore[j];
+
+ bunbo = MIN( ssi, ssj );
+ if( bunbo == 0.0 )
+ mtxv = maxdist;
+ else
+ {
+// mtxv = maxdist * ( 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty ) / bunbo );
+ if( usenaivescoreinsteadofalignmentscore )
+ mtxv = maxdist * ( 1.0 - (double)naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], 0.0 ) / bunbo );
+ else
+ mtxv = maxdist * ( 1.0 - (double)naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty ) / bunbo );
+ }
+#if 1
+
+ if( mtxv < 0.0 )
+ {
+ reporterr( "WARNING: negative distance, mtxv = %f\n", mtxv );
+ mtxv = 0.0;
+ }
+
+ if( mtxv > 9.9 )
+ {
+ fprintf( stderr, "WARNING: distance %d-%d is strange, %f.\n", i, j, mtxv );
+ mtxv = 9.9;
+// exit( 1 ); // 2016/Aug/3
+ }
+#else // CHUUI!!! 2012/05/16
+ if( mtxv > 2.0 )
+ {
+ mtxv = 2.0;
+ }
+ if( mtxv < 0.0 )
+ {
+ fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv );
+ exit( 1 );
+ }
+#endif
+ mtx[i][j] = mtxv;
+ }
}
}
+#endif
#endif
{
int c;
+ nadd = 0;
nthread = 1;
alg = 'X';
fmodel = 0;
poffset = NOTSPECIFIED; //?
kimuraR = NOTSPECIFIED;
pamN = NOTSPECIFIED;
+ usenaivescoreinsteadofalignmentscore = 0;
+ nwildcard = 0;
while( --argc > 0 && (*++argv)[0] == '-' )
{
{
switch( c )
{
+ case 'Z':
+ usenaivescoreinsteadofalignmentscore = 1;
+ break;
case 't':
treeout = '1';
break;
case 'P':
dorp = 'p';
break;
+ case ':':
+ nwildcard = 1;
+ break;
+ case 'K': // Hontou ha iranai. disttbfast.c, tbfast.c to awaserutame.
+ break;
+ case 'I':
+ nadd = myatoi( *++argv );
+ fprintf( stderr, "nadd = %d\n", nadd );
+ --argc;
+ goto nextoption;
+ case 'f':
+ ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
+ --argc;
+ goto nextoption;
+ case 'g':
+ ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
+ --argc;
+ goto nextoption;
+ case 'h':
+ poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
+ --argc;
+ goto nextoption;
+ case 'k':
+ kimuraR = myatoi( *++argv );
+// fprintf( stderr, "kimuraR = %d\n", kimuraR );
+ --argc;
+ goto nextoption;
+ case 'b':
+ nblosum = myatoi( *++argv );
+ scoremtx = 1;
+// fprintf( stderr, "blosum %d\n", nblosum );
+ --argc;
+ goto nextoption;
+ case 'j':
+ pamN = myatoi( *++argv );
+ scoremtx = 0;
+ TMorJTT = JTT;
+// fprintf( stderr, "jtt %d\n", pamN );
+ --argc;
+ goto nextoption;
+ case 'm':
+ pamN = myatoi( *++argv );
+ scoremtx = 0;
+ TMorJTT = TM;
+// fprintf( stderr, "TM %d\n", pamN );
+ --argc;
+ goto nextoption;
case 'i':
inputfile = *++argv;
- fprintf( stderr, "inputfile = %s\n", inputfile );
+// fprintf( stderr, "inputfile = %s\n", inputfile );
--argc;
goto nextoption;
+ case 'M':
+ maxdist = myatoi( *++argv );
+// fprintf( stderr, "maxdist = %d\n", maxdist );
+ --argc;
+ goto nextoption;
case 'C':
- nthread = atoi( *++argv );
- fprintf( stderr, "nthread = %d\n", nthread );
+ nthread = myatoi( *++argv );
+// fprintf( stderr, "nthread = %d\n", nthread );
--argc;
goto nextoption;
+ break;
}
}
nextoption:
int main( int argc, char **argv )
{
- int i, j;
+ int i, j, ilim;
char **seq;
static char **name;
- static int nlen[M];
- float *selfscore;
+ int *nlen;
+ double *selfscore;
double **mtx;
+ double mtxv;
FILE *fp;
FILE *infp;
- float ssi, ssj, bunbo;
+ double ssi, ssj, bunbo;
+ int **skiptable = NULL;
+ char c;
arguments( argc, argv );
#endif
rewind( infp );
+ njob -= nadd; // atarashii hairetsu ha mushi
+
seq = AllocateCharMtx( njob, nlenmax+1 );
name = AllocateCharMtx( njob, B+1 );
mtx = AllocateDoubleMtx( njob, njob );
selfscore = AllocateFloatVec( njob );
+ nlen = AllocateIntVec( njob );
+
#if 0
FRead( stdin, name, nlen, seq );
#endif
fclose( infp );
+
+ for( i=1; i<njob; i++ )
+ {
+ if( nlen[i] != nlen[0] )
+ {
+ reporterr( "Not aligned!\n" );
+ exit( 1 );
+ }
+ }
+
constants( njob, seq );
+ c = seqcheck( seq );
+ if( c )
+ {
+ reporterr( "Illegal character %c\n", c );
+ exit( 1 );
+ }
+
#if 0
for( i=0; i<njob-1; i++ )
{
#else // 061003
for( i=0; i<njob; i++ )
{
- selfscore[i] = (float)naivepairscore11( seq[i], seq[i], penalty );
-
+ selfscore[i] = (double)naivepairscore11( seq[i], seq[i], penalty );
}
+
+ skiptable = AllocateIntMtx( njob, 0 );
+ makeskiptable( njob, skiptable, seq ); // allocate suru.
+
#ifdef enablemultithread
if( nthread > 0 )
{
targ[i].selfscore = selfscore;
targ[i].mtx = mtx;
targ[i].seq = seq;
+ targ[i].skiptable = skiptable;
targ[i].jobpospt = &jobpos;
targ[i].mutex = &mutex;
else
#endif
{
- for( i=0; i<njob-1; i++ )
+ ilim = njob-1;
+ for( i=0; i<ilim; i++ )
{
ssi = selfscore[i];
fprintf( stderr, "%4d/%4d\r", i+1, njob );
ssj = selfscore[j];
bunbo = MIN( ssi, ssj );
if( bunbo == 0.0 )
- mtx[i][j] = 1.0;
+ mtxv = maxdist;
else
- mtx[i][j] = 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty ) / bunbo;
-// mtx[i][j] = 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty ) / MIN( selfscore[i], selfscore[j] );
-// fprintf( stderr, "i=%d,j=%d, l=%d### %f, score = %d\n", i, j, nlen[0], mtx[i][j], naivepairscore11( seq[i], seq[j], penalty ) );
+ {
+// reporterr( "usenaivescoreinsteadofalignmentscore = %d\n", usenaivescoreinsteadofalignmentscore );
+ if( usenaivescoreinsteadofalignmentscore ) // osoi.
+ mtxv = maxdist * ( 1.0 - (double)naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], 0.0 ) / bunbo );
+ else
+ mtxv = maxdist * ( 1.0 - (double)naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty ) / bunbo );
+// mtxv = maxdist * ( 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty ) / bunbo );
+// mtxv = 1.0 - (double)naivepairscore11( seq[i], seq[j], penalty ) / MIN( selfscore[i], selfscore[j] );
+// fprintf( stderr, "i=%d,j=%d, l=%d### %f, score = %f, %f, %f\n", i, j, nlen[0], mtxv, naivepairscore11( seq[i], seq[j], penalty ), ssi, ssj );
+
+ }
+#if 1
+ if( mtxv < 0.0 )
+ {
+ reporterr( "WARNING: negative distance, mtxv = %f\n", mtxv );
+ mtxv = 0.0;
+ }
+ if( mtxv > 9.0 )
+ {
+ fprintf( stderr, "WARNING: Distance %d-%d is strange, %f.\n", i, j, mtxv );
+ mtxv = 9.9;
+// exit( 1 ); // 2016/Aug/3
+ }
+#else // CHUUI!!! 2012/05/16
+ if( mtxv > 2.0 )
+ {
+ mtxv = 2.0;
+ }
+ if( mtxv < 0.0 )
+ {
+ fprintf( stderr, "Distance %d-%d is strange, %f.\n", i, j, mtxv );
+ exit( 1 );
+ }
+#endif
+ mtx[i][j] = mtxv;
}
}
}
veryfastsupg_double_outtree( njob, mtx, topol, len );
}
#endif
+ if( skiptable ) FreeIntMtx( skiptable ); skiptable = NULL;
SHOWVERSION;
exit( 0 );
/*
extern char **seq_g;
extern char **res_g;
+static int subalignment;
+static int subalignmentoffset;
+static int specifictarget;
static int intop;
static int intree;
+static double autosubalignment;
+
+
+static void calcmaxdistclass( void )
+{
+ int c;
+ double rep;
+ for( c=0; c<ndistclass; c++ )
+ {
+ rep = (double) 2 * c / ndistclass; // dist:0-2 for dist2offset
+// fprintf( stderr, "c=%d, rep=%f, offset=%f\n", c, rep, dist2offset( rep ) );
+ if( dist2offset( rep ) == 0.0 )
+ break;
+ }
+ fprintf( stderr, "ndistclass = %d, maxdistclass = %d\n", ndistclass, c+1 );
+ maxdistclass = c + 1;
+// maxdistclass = ndistclass; // CHUUI!!!!
+ return;
+}
void arguments( int argc, char *argv[] )
{
nthread = 1;
randomseed = 0;
scoreout = 0;
+ spscoreout = 0;
parallelizationstrategy = BAATARI1;
intop = 0;
intree = 0;
checkC = 0;
tbitr = 0;
treemethod = 'X';
+ sueff_global = 0.1;
scoremtx = 1;
dorp = NOTSPECIFIED;
ppenalty = NOTSPECIFIED;
+ penalty_shift_factor = 1000.0;
ppenalty_ex = NOTSPECIFIED;
poffset = NOTSPECIFIED;
kimuraR = NOTSPECIFIED;
TMorJTT = JTT;
consweight_multi = 1.0;
consweight_rna = 0.0;
+ subalignment = 0;
+ subalignmentoffset = 0;
+ legacygapcost = 0;
+ specificityconsideration = 0.0;
+ autosubalignment = 0.0;
+ specifictarget = 0;
+ nwildcard = 0;
while( --argc > 0 && (*++argv)[0] == '-' )
{
--argc;
goto nextoption;
case 'I':
- niter = atoi( *++argv );
+ niter = myatoi( *++argv );
fprintf( stderr, "niter = %d\n", niter );
--argc;
goto nextoption;
// fprintf( stderr, "ppenalty = %d\n", ppenalty );
--argc;
goto nextoption;
+ case 'Q':
+ penalty_shift_factor = atof( *++argv );
+ if( penalty_shift_factor < 100.0 && penalty_shift_factor != 2.0 )
+ {
+ fprintf( stderr, "%f, penalty_shift is fixed to penalty x 2 in the iterative refinement phase.\n", penalty_shift_factor );
+ penalty_shift_factor = 2.0;
+ }
+ --argc;
+ goto nextoption;
case 'g':
ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
// fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
--argc;
goto nextoption;
case 'k':
- kimuraR = atoi( *++argv );
+ kimuraR = myatoi( *++argv );
fprintf( stderr, "kappa = %d\n", kimuraR );
--argc;
goto nextoption;
case 'b':
- nblosum = atoi( *++argv );
+ nblosum = myatoi( *++argv );
scoremtx = 1;
fprintf( stderr, "blosum %d / kimura 200\n", nblosum );
--argc;
goto nextoption;
case 'j':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = JTT;
fprintf( stderr, "jtt/kimura %d\n", pamN );
--argc;
goto nextoption;
case 'm':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = TM;
fprintf( stderr, "tm %d\n", pamN );
--argc;
goto nextoption;
case 'C':
- nthread = atoi( *++argv );
+ nthread = myatoi( *++argv );
fprintf( stderr, "nthread = %d\n", nthread );
--argc;
goto nextoption;
+ case 'H':
+ subalignment = 1;
+ subalignmentoffset = myatoi( *++argv );
+ --argc;
+ goto nextoption;
case 't':
- randomseed = atoi( *++argv );
+ randomseed = myatoi( *++argv );
fprintf( stderr, "randomseed = %d\n", randomseed );
--argc;
goto nextoption;
// exit( 1 );
--argc;
goto nextoption;
+ case 's':
+ specificityconsideration = (double)myatof( *++argv );
+// fprintf( stderr, "specificityconsideration = %f\n", specificityconsideration );
+ --argc;
+ goto nextoption;
+#if 0
+ case 'S' :
+ scoreout = 1; // for checking parallel calculation
+ break;
+#else
case 'S' :
- scoreout = 1;
+ spscoreout = 1; // 2014/Dec/30, sp score
break;
+#endif
+#if 0
case 's' :
RNAscoremtx = 'r';
break;
+#endif
#if 1
case 'a':
fmodel = 1;
case 'P':
dorp = 'p';
break;
+#if 0
case 'Q':
alg = 'Q';
break;
+#endif
case 'R':
rnaprediction = 'r';
break;
case 's' :
treemethod = 's';
break;
-#endif
case 'H':
alg = 'H';
break;
+#endif
case 'A':
alg = 'A';
break;
case 'M':
alg = 'M';
break;
+ case '@':
+ alg = 'd';
+ break;
case 'F':
use_fft = 1;
break;
case 'J':
utree = 0;
break;
+#if 0
case 'd':
disp = 1;
break;
+#endif
case 'Z':
score_check = 0;
break;
case 'Y':
score_check = 2;
break;
+ case 'L':
+ legacygapcost = 1;
+ break;
#if 0
case 'n' :
treemethod = 'n';
case 'n' :
outnumber = 1;
break;
- case 'X' :
+ case 'X':
treemethod = 'X';
- break;
+ sueff_global = atof( *++argv );
+ fprintf( stderr, "sueff_global = %f\n", sueff_global );
+ --argc;
+ goto nextoption;
+#if 0
case 'E' :
treemethod = 'E';
break;
case 'q' :
treemethod = 'q';
break;
+#endif
+ case 'E':
+ autosubalignment = atof( *++argv );
+ fprintf( stderr, "autosubalignment = %f\n", autosubalignment );
+ --argc;
+ goto nextoption;
+ case 'W':
+ minimumweight = atof( *++argv );
+ fprintf( stderr, "minimumweight = %f\n", minimumweight );
+ --argc;
+ goto nextoption;
case 'z':
- fftThreshold = atoi( *++argv );
+ fftThreshold = myatoi( *++argv );
--argc;
goto nextoption;
case 'w':
- fftWinSize = atoi( *++argv );
+ fftWinSize = myatoi( *++argv );
--argc;
goto nextoption;
+ case '=':
+ specifictarget = 1;
+ break;
+ case ':':
+ nwildcard = 1;
+ break;
default:
fprintf( stderr, "illegal option %c\n", c );
argc = 0;
double **eff;
FILE *prep;
FILE *infp;
+ FILE *orderfp;
int alloclen;
int returnvalue;
char c;
static char **nogap1seq;
static char *kozoarivec;
int nkozo;
+ int alignmentlength;
+ int **skipthisbranch;
+ int foundthebranch;
+ int nsubalignments, maxmem;
+ int **subtable;
+ int *insubtable;
+ int *preservegaps;
+ char ***subalnpt;
+ int ntarget, *targetmap, *targetmapr;
+ int ilim;
arguments( argc, argv );
#ifndef enablemultithread
nthread = 0;
#endif
+ if( fastathreshold < 0.0001 ) constraint = 0;
if( inputfile )
{
exit( 1 );
}
+
+ if( subalignment )
+ {
+ readsubalignmentstable( njob, NULL, NULL, &nsubalignments, &maxmem );
+ fprintf( stderr, "nsubalignments = %d\n", nsubalignments );
+ fprintf( stderr, "maxmem = %d\n", maxmem );
+ subtable = AllocateIntMtx( nsubalignments, maxmem+1 );
+ insubtable = AllocateIntVec( njob );
+ preservegaps = AllocateIntVec( njob );
+ for( i=0; i<njob; i++ ) insubtable[i] = 0;
+ for( i=0; i<njob; i++ ) preservegaps[i] = 0;
+ subalnpt = AllocateCharCub( nsubalignments, maxmem, 0 );
+ readsubalignmentstable( njob, subtable, preservegaps, NULL, NULL );
+ for( i=0; i<nsubalignments; i++ ) for( j=0; j<insubtable[i]; j++ )
+ {
+ if( subtable[i][j] < 0 )
+ {
+ fprintf( stderr, "Not supported in the iterative refinmenment mode.\n" );
+ fprintf( stderr, "Please use a positive number to represent a sequence.\n" );
+ }
+ }
+ }
+
ocut = cut;
segment = (Segment *)calloc( MAXSEG, sizeof( Segment ) );
seq_g_bk = AllocateCharMtx( njob, 0 );
for( i=0; i<njob; i++ ) seq_g_bk[i] = seq_g[i];
kozoarivec = AllocateCharVec( njob );
+ skipthisbranch = AllocateIntMtx( njob, 2 );
+ for( i=0; i<njob; i++ ) skipthisbranch[i][0] = skipthisbranch[i][1] = 0;
+
+
+#if 0
+ Read( name, nlen, seq_g );
+#else
+ readData_pointer( infp, name, nlen, seq_g );
+#endif
+ fclose( infp );
+
+ if( specifictarget )
+ {
+ targetmap = calloc( njob, sizeof( int ) );
+ ntarget = 0;
+ for( i=0; i<njob; i++ )
+ {
+ targetmap[i] = -1;
+ if( !strncmp( name[i]+1, "_focus_", 7 ) )
+ targetmap[i] = ntarget++;
+ }
+ targetmapr = calloc( ntarget, sizeof( int ) );
+ for( i=0; i<njob; i++ )
+ if( targetmap[i] != -1 ) targetmapr[targetmap[i]] = i;
+ }
+ else
+ {
+ ntarget = njob;
+ targetmap = calloc( njob, sizeof( int ) );
+ targetmapr = calloc( njob, sizeof( int ) );
+ for( i=0; i<njob; i++ )
+ targetmap[i] = targetmapr[i] = i;
+ }
if( constraint )
{
- localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
- for( i=0; i<njob; i++)
+ ilim = njob;
+ localhomtable = (LocalHom **)calloc( ntarget, sizeof( LocalHom *) );
+ for( i=0; i<ntarget; i++)
{
- localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
- for( j=0; j<njob; j++)
+ localhomtable[i] = (LocalHom *)calloc( ilim, sizeof( LocalHom ) );
+ for( j=0; j<ilim; j++)
{
localhomtable[i][j].start1 = -1;
localhomtable[i][j].end1 = -1;
localhomtable[i][j].last = localhomtable[i]+j;
localhomtable[i][j].korh = 'h';
}
+ if( !specifictarget ) ilim--;
}
fprintf( stderr, "Loading 'hat3' ... " );
fflush( stderr );
prep = fopen( "hat3", "r" );
if( prep == NULL ) ErrorExit( "Make hat3." );
- readlocalhomtable2( prep, njob, localhomtable, kozoarivec );
+ if( specifictarget ) readlocalhomtable2_target( prep, njob, localhomtable, kozoarivec, targetmap );
+ else readlocalhomtable2_half( prep, njob, localhomtable, kozoarivec );
fclose( prep );
// for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )
// fprintf( stdout, "%d %d %d %d %d %d %d\n", i, j, localhomtable[i][j].opt, localhomtable[i][j].start1, localhomtable[i][j].end1, localhomtable[i][j].start2, localhomtable[i][j].end2 );
}
}
-#if 0
- Read( name, nlen, seq_g );
-#else
- readData_pointer( infp, name, nlen, seq_g );
-#endif
- fclose( infp );
+ if( specificityconsideration ) calcmaxdistclass();
for( i=0; i<njob; i++ )
{
{
gappick0( nogap1seq[0], seq_g[i] );
nogaplen = strlen( nogap1seq[0] );
- singlerna[i] = (RNApair **)calloc( nogaplen, sizeof( RNApair * ) );
+ singlerna[i] = (RNApair **)calloc( nogaplen+1, sizeof( RNApair * ) );
for( j=0; j<nogaplen; j++ )
{
singlerna[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) );
singlerna[i][j][0].bestpos = -1;
singlerna[i][j][0].bestscore = -1.0;
}
+ singlerna[i][nogaplen] = NULL;
readmccaskill( prep, singlerna[i], nogaplen );
}
fclose( prep );
if( !prep ) ErrorExit( "Make hat2." );
readhat2_pointer( prep, njob, name, eff );
fclose( prep );
-#if DEBUG
+#if 0
+ fprintf( stderr, "eff = \n" );
for( i=0; i<njob-1; i++ )
{
for( j=i+1; j<njob; j++ )
{
- printf( " %f", eff[i][j] );
+ fprintf( stderr, "%d-%d, %f\n", i, j, eff[i][j] );
}
- printf( "\n" );
+ fprintf( stderr, "\n" );
}
#endif
if( intree )
- veryfastsupg_double_loadtree( njob, eff, topol, len );
+ {
+ veryfastsupg_double_loadtree( njob, eff, topol, len, name );
+#if 0
+ fprintf( stderr, "eff = \n" );
+ for( i=0; i<njob-1; i++ )
+ {
+ for( j=i+1; j<njob; j++ )
+ {
+ fprintf( stderr, "%d-%d, %f\n", i, j, eff[i][j] );
+ }
+ fprintf( stderr, "\n" );
+ }
+exit( 1 );
+#endif
+ }
else if( intop ) // v6.528 deha if( intop ) dattanode intree ga mukou datta.
- veryfastsupg_double_loadtop( njob, eff, topol, len );
+ {
+ fprintf( stderr, "--topin has been disabled\n" );
+ exit( 1 );
+// veryfastsupg_double_loadtop( njob, eff, topol, len );
+ }
+ else if( subalignment )
+ fixed_supg_double_treeout_constrained( njob, eff, topol, len, name, nsubalignments, subtable );
else if( treemethod == 'X' || treemethod == 'E' || treemethod == 'q' )
// veryfastsupg_double_outtree( njob, eff, topol, len, name );
fixed_musclesupg_double_treeout( njob, eff, topol, len, name );
printf( "utree = %d\n", utree );
#endif
+ if( autosubalignment > 0.0 && subalignment == 0 )
+ {
+// reporterr( "Computing skipthisbranch..\n" );
+ insubtable = AllocateIntVec( njob );
+ preservegaps = AllocateIntVec( njob );
+ subtable = calloc( 1, sizeof( char * ) );
+ subtable[0] = NULL; // for FreeIntMtx
+ for( i=0; i<njob; i++ ) insubtable[i] = 0;
+ for( i=0; i<njob; i++ ) preservegaps[i] = 0; // tsukawanaikamo
+ if( generatesubalignmentstable( njob, &subtable, &nsubalignments, &maxmem, topol, len, autosubalignment ) ) // subtable ha allocate sareru.
+ {
+ reporterr( "################################################################################################ \n" );
+ reporterr( "#\n" );
+ reporterr( "# WARNING: Iterative refinment was not done because you gave a large --fix value (%6.3f).\n", autosubalignment );
+ reporterr( "#\n" );
+ reporterr( "################################################################################################ \n" );
+ writePre( njob, name, nlen, seq_g, 1 );
+
+ FreeCharMtx( seq_g_bk );
+ FreeIntCub( topol );
+ FreeDoubleMtx( len );
+ FreeDoubleMtx( eff );
+ FreeIntMtx( skipthisbranch );
+ FreeIntMtx( subtable );
+ free( preservegaps );
+ free( insubtable );
+ SHOWVERSION;
+ return( 0 );
+ }
+// subtable = AllocateIntMtx( nsubalignments, maxmem+1 );
+ fprintf( stderr, "nsubalignments = %d, maxmem = %d\n", nsubalignments, maxmem );
+ subalnpt = AllocateCharCub( nsubalignments, maxmem, 0 );
+#if 0
+ for( i=0; i<nsubalignments; i++ )
+ {
+ reporterr( "subalignment %d\n", i );
+ for( j=0; subtable[i][j]!=-1; j++ )
+ {
+ reporterr( "%5d", subtable[i][j] );
+ }
+ reporterr( "\n" );
+ }
+#endif
+#if 0 // wakaran
+ for( i=0; i<nsubalignments; i++ ) for( j=0; j<insubtable[i]; j++ )
+ {
+ if( subtable[i][j] < 0 )
+ {
+ fprintf( stderr, "Not supported in the iterative refinmenment mode.\n" );
+ fprintf( stderr, "Please use a positive number to represent a sequence.\n" );
+ }
+ }
+#endif
+// reporterr( "done.\n" );
+ }
+
+
+ orderfp = fopen( "order", "w" );
+ if( !orderfp )
+ {
+ fprintf( stderr, "Cannot open 'order'\n" );
+ exit( 1 );
+ }
+ for( i=0; (j=topol[njob-2][0][i])!=-1; i++ )
+ {
+ fprintf( orderfp, "%d\n", j );
+ }
+ for( i=0; (j=topol[njob-2][1][i])!=-1; i++ )
+ {
+ fprintf( orderfp, "%d\n", j );
+ }
+ fclose( orderfp );
+
+
fprintf( stderr, "\n" );
if( ( !utree && kobetsubunkatsu ) || constraint || !bunkatsu )
#endif
}
+
+//--------------- kokokara ----
+ if( subalignment || autosubalignment )
+ {
+ for( i=0; i<nsubalignments; i++ )
+ {
+ fprintf( stderr, "\nChecking subalignment %d:\n", i+1 );
+ alignmentlength = strlen( seq[subtable[i][0]] );
+ for( j=0; subtable[i][j]!=-1; j++ )
+ fprintf( stderr, " %d ", subtable[i][j]+1 );
+// fprintf( stderr, " ##### %d-%d. %-30.30s\n", i, subtable[i][j]+1, name[subtable[i][j]]+1 );
+ fprintf( stderr, "\n" );
+ for( j=0; subtable[i][j]!=-1; j++ )
+ {
+ if( subtable[i][j] >= njob )
+ {
+ fprintf( stderr, "No such sequence, %d.\n", subtable[i][j]+1 );
+ exit( 1 );
+ }
+ if( alignmentlength != strlen( seq[subtable[i][j]] ) )
+ {
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "###############################################################################\n" );
+ fprintf( stderr, "# ERROR!\n" );
+ fprintf( stderr, "# Subalignment %d must be aligned.\n", i+1 );
+ fprintf( stderr, "# Please check the alignment lengths of following sequences.\n" );
+ fprintf( stderr, "#\n" );
+ fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength );
+ fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) );
+ fprintf( stderr, "#\n" );
+ fprintf( stderr, "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" );
+ if( subalignmentoffset )
+ {
+ fprintf( stderr, "#\n" );
+ fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset );
+ fprintf( stderr, "# In this case, the rule of numbering is:\n" );
+ fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset );
+ fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob );
+ }
+ fprintf( stderr, "###############################################################################\n" );
+ fprintf( stderr, "\n" );
+ exit( 1 );
+ }
+ insubtable[subtable[i][j]] = 1;
+ }
+ for( j=0; j<njob-1; j++ )
+ {
+#if 0
+ int k;
+ reporterr( "#### STEP%d\n", j );
+ for( k=0; topol[j][0][k]!=-1; k++ ) reporterr( "%3d ", topol[j][0][k] );
+ reporterr( "len=%f\n", len[j][0] );
+ for( k=0; topol[j][1][k]!=-1; k++ ) reporterr( "%3d ", topol[j][1][k] );
+ reporterr( "len=%f\n", len[j][1] );
+ reporterr( "\n" );
+#endif
+ if( includemember( topol[j][0], subtable[i] ) && !samemember( topol[j][0], subtable[i] ) )
+ {
+ skipthisbranch[j][0] = 1;
+// reporterr( "SKIP 0 !!!!!!\n" );
+ }
+ if( includemember( topol[j][1], subtable[i] ) && !samemember( topol[j][1], subtable[i] ) )
+ {
+ skipthisbranch[j][1] = 1;
+// reporterr( "SKIP 1 !!!!!!\n" );
+ }
+ }
+ foundthebranch = 0;
+ for( j=0; j<njob-1; j++ )
+ {
+ if( samemember( topol[j][0], subtable[i] ) || samemember( topol[j][1], subtable[i] ) )
+ {
+ foundthebranch = 1;
+ fprintf( stderr, " -> OK\n" );
+ break;
+ }
+ }
+ if( !foundthebranch )
+ {
+ system( "cp infile.tree GuideTree" ); // tekitou
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "###############################################################################\n" );
+ fprintf( stderr, "# ERROR!\n" );
+ fprintf( stderr, "# Subalignment %d does not seem to form a monophyletic cluster\n", i+1 );
+ fprintf( stderr, "# in the guide tree ('GuideTree' in this directory) internally computed.\n" );
+ fprintf( stderr, "# If you really want to use this subalignment, pelase give a tree with --treein \n" );
+ fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/treein.html\n" );
+ fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/merge.html\n" );
+ if( subalignmentoffset )
+ {
+ fprintf( stderr, "#\n" );
+ fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset );
+ fprintf( stderr, "# In this case, the rule of numbering is:\n" );
+ fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset );
+ fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob );
+ }
+ fprintf( stderr, "############################################################################### \n" );
+ fprintf( stderr, "\n" );
+ exit( 1 );
+ }
+// commongappick( seq[subtable[i]], subalignment[i] ); // irukamo
+ }
+#if 0
+ for( i=0; i<njob-1; i++ )
+ {
+ fprintf( stderr, "STEP %d\n", i+1 );
+ fprintf( stderr, "group1 = " );
+ for( j=0; topol[i][0][j] != -1; j++ )
+ fprintf( stderr, "%d ", topol[i][0][j]+1 );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "SKIP -> %d\n\n", skipthisbranch[i][0] );
+ fprintf( stderr, "group2 = " );
+ for( j=0; topol[i][1][j] != -1; j++ )
+ fprintf( stderr, "%d ", topol[i][1][j]+1 );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "SKIP -> %d\n\n", skipthisbranch[i][1] );
+ }
+#endif
+
+ for( i=0; i<njob; i++ )
+ {
+ if( insubtable[i] ) strcpy( bseq[i], seq[i] );
+ else gappick0( bseq[i], seq[i] );
+ }
+
+ for( i=0; i<nsubalignments; i++ )
+ {
+ for( j=0; subtable[i][j]!=-1; j++ ) subalnpt[i][j] = bseq[subtable[i][j]];
+ commongappick( j, subalnpt[i] );
+ }
+
+ FreeIntMtx( subtable );
+ free( insubtable );
+ for( i=0; i<nsubalignments; i++ ) free( subalnpt[i] );
+ free( subalnpt );
+ free( preservegaps );
+ }
+//--------------- kokomade ----
+
+
+
+
for( i=0; i<njob; i++ ) res_g[i][0] = 0;
for( iseg=0; iseg<nseg-1; iseg++ )
fprintf( trap_g, "Segment %3d/%3d %4d-%4d\n", iseg+1, nseg-1, pos+1, pos+1+tmplen );
cut = ocut;
- returnvalue = TreeDependentIteration( njob, name, nlen, seq, bseq, topol, len, alloclen, localhomtable, singlerna, nkozo, kozoarivec );
+ returnvalue = TreeDependentIteration( njob, name, nlen, seq, bseq, topol, len, eff, skipthisbranch, alloclen, localhomtable, singlerna, nkozo, kozoarivec, ntarget, targetmap, targetmapr );
for( i=0; i<njob; i++ )
strcat( res_g[i], bseq[i] );
FreeIntCub( topol );
FreeDoubleMtx( len );
FreeDoubleMtx( eff );
- if( constraint ) FreeLocalHomTable( localhomtable, njob );
+ FreeIntMtx( skipthisbranch );
+ free( kozoarivec );
+ if( constraint )
+ {
+ if( specifictarget ) FreeLocalHomTable_part( localhomtable, ntarget, njob );
+ else FreeLocalHomTable_half( localhomtable, njob );
+ }
+ free( targetmap );
+ free( targetmapr );
+ if( rnakozo && rnaprediction == 'm' )
+ {
+ if( singlerna ) // nen no tame
+ {
+ for( i=0; i<njob; i++ )
+ {
+ for( j=0; singlerna[i][j]!=NULL; j++ )
+ {
+ if( singlerna[i][j] ) free( singlerna[i][j] );
+ }
+ if( singlerna[i] ) free( singlerna[i] );
+ }
+ free( singlerna );
+ singlerna = NULL;
+ }
+ }
#if 0
Write( stdout, njob, name, nlen, bseq );
fprintf( stderr, "done\n" );
fprintf( trap_g, "done\n" );
fclose( trap_g );
+ freeconstants();
devide = 0;
#endif
+ if( spscoreout ) reporterr( "Unweighted sum-of-pairs score = %10.5f\n", sumofpairsscore( njob, res_g ) );
+
SHOWVERSION;
return( 0 );
}
static char *orderfile;
static int format;
static int namelen;
+static int extendedalphabet;
static void fillspace( char *seq, int lenmax )
{
void setmark_clustal( int nlen, int nseq, char **seq, char *mark )
{
- int i, j, k;
+ int i, j, k, nalpha;
+ char firstletter;
+ char *strong[9];
+ char *weaker[11];
+ int nstrong, nweaker;
+ char s;
- char *strong[] = {
- "STA",
- "NEQK",
- "NHQK",
- "NDEQ",
- "QHRK",
- "MILV",
- "MILF",
- "HY",
- "FYW",
- };
- int nstrong = 9;
- char *weaker[] = {
- "CSA",
- "ATV",
- "SAG",
- "STNK",
- "STPA",
- "SGND",
- "SNDEQK",
- "NDEQHK",
- "NEQHRK",
- "FVLIM",
- "HFY",
- };
- int nweaker = 11;
+ if( dorp == 'd' )
+ {
+ strong[0] = "TU";
+ nstrong = 1;
+ weaker[0] = "AG";
+ weaker[1] = "CT";
+ weaker[2] = "CU";
+ nweaker = 2;
+ nalpha = 10;
+ }
+ else
+ {
+ strong[0] = "STA";
+ strong[1] = "NEQK";
+ strong[2] = "NHQK";
+ strong[3] = "NDEQ";
+ strong[4] = "QHRK";
+ strong[5] = "MILV";
+ strong[6] = "MILF";
+ strong[7] = "HY";
+ strong[8] = "FYW";
+ nstrong = 9;
+ weaker[0] = "CSA";
+ weaker[1] = "ATV";
+ weaker[2] = "SAG";
+ weaker[3] = "STNK";
+ weaker[4] = "STPA";
+ weaker[5] = "SGND";
+ weaker[6] = "SNDEQK";
+ weaker[7] = "NDEQHK";
+ weaker[8] = "NEQHRK";
+ weaker[9] = "FVLIM";
+ weaker[10] = "HFY";
+ nweaker = 11;
+ nalpha = 20;
+ }
for( i=0; i<nlen; i++ )
{
mark[i] = ' ';
for( j=0; j<nseq; j++ )
- if( '-' == seq[j][i] ) break;
- if( j != nseq )
{
- continue;
+ s = seq[j][i];
+ if( '-' == s || ' ' == s ) break;
}
- for( j=0; j<nseq; j++ )
- if( toupper( seq[0][i] ) != toupper( seq[j][i] ) ) break;
- if( j == nseq )
+ if( j != nseq )
{
- mark[i] = '*';
continue;
}
- for( k=0; k<nstrong; k++ )
+ if( extendedalphabet )
{
+ firstletter = seq[0][i];
+ if( amino_n[(unsigned char)firstletter] < 0 ) continue;
+
for( j=0; j<nseq; j++ )
+ if( seq[j][i] != firstletter ) break;
+ if( j == nseq )
{
- if( !strchr( strong[k], toupper( seq[j][i] ) ) ) break;
+ mark[i] = '*';
+ continue;
}
- if( j == nseq ) break;
- }
- if( k < nstrong )
- {
- mark[i] = ':';
- continue;
}
- for( k=0; k<nweaker; k++ )
+ else
{
+ firstletter = toupper( seq[0][i] );
+ if( amino_n[(unsigned char)firstletter] >= nalpha || amino_n[(unsigned char)firstletter] < 0 ) continue;
+
for( j=0; j<nseq; j++ )
+ if( toupper( seq[j][i] ) != firstletter ) break;
+ if( j == nseq )
{
- if( !strchr( weaker[k], toupper( seq[j][i] ) ) ) break;
+ mark[i] = '*';
+ continue;
+ }
+ for( k=0; k<nstrong; k++ )
+ {
+ for( j=0; j<nseq; j++ )
+ {
+ if( !strchr( strong[k], toupper( seq[j][i] ) ) ) break;
+ }
+ if( j == nseq ) break;
+ }
+ if( k < nstrong )
+ {
+ mark[i] = ':';
+ continue;
+ }
+ for( k=0; k<nweaker; k++ )
+ {
+ for( j=0; j<nseq; j++ )
+ {
+ if( !strchr( weaker[k], toupper( seq[j][i] ) ) ) break;
+ }
+ if( j == nseq ) break;
+ }
+ if( k < nweaker )
+ {
+ mark[i] = '.';
+ continue;
}
- if( j == nseq ) break;
- }
- if( k < nweaker )
- {
- mark[i] = '.';
- continue;
}
}
mark[nlen] = 0;
continue;
}
for( j=0; j<nseq; j++ )
- if( amino_grp[(int)seq[0][i]] != amino_grp[(int)seq[j][i]] ) break;
+ if( amino_grp[(unsigned char)seq[0][i]] != amino_grp[(unsigned char)seq[j][i]] ) break;
if( j == nseq )
{
mark[i] = '.';
void arguments( int argc, char *argv[] )
{
int c;
- namelen = 15;
+ namelen = -1;
scoremtx = 1;
nblosum = 62;
dorp = NOTSPECIFIED;
comment = NULL;
orderfile = NULL;
format = 'c';
+ extendedalphabet = 0;
while( --argc > 0 && (*++argv)[0] == '-' )
{
--argc;
goto nextoption;
case 'n':
- namelen = atoi( *++argv );
+ namelen = myatoi( *++argv );
fprintf( stderr, "namelen = %d\n", namelen );
--argc;
goto nextoption;
case 'y':
format = 'y';
break;
+ case 'E':
+ extendedalphabet = 1;
+ nblosum = -2;
+ break;
+ case 'N':
+ extendedalphabet = 0;
+ break;
default:
fprintf( stderr, "illegal option %c\n", c );
argc = 0;
if( orderfile )
{
orderfp = fopen( orderfile, "r" );
- if( !orderfile )
+ if( !orderfp )
{
fprintf( stderr, "Cannot open %s\n", orderfile );
exit( 1 );
else if( format == 'c' )
clustalout_pointer( stdout, njob, nlenmax, seq, name, mark, comment, order, namelen );
else if( format == 'y' )
- phylipout_pointer( stdout, njob, nlenmax, seq, name, order );
+ phylipout_pointer( stdout, njob, nlenmax, seq, name, order, namelen );
else
fprintf( stderr, "Unknown format\n" );
/*
from "C gengo niyoru saishin algorithm jiten" ISBN4-87408-414-1 Haruhiko Okumura
*/
-static void make_sintbl(int n, float sintbl[])
+static void make_sintbl(int n, double sintbl[])
{
int i, n2, n4, n8;
double c, s, dc, ds, t;
{
static TLS int last_n = 0; /* {\tt n} */
static TLS int *bitrev = NULL; /* */
- static TLS float *sintbl = NULL; /* */
+ static TLS double *sintbl = NULL; /* */
int i, j, k, ik, h, d, k2, n4, inverse;
- float t, s, c, dR, dI;
+ double t, s, c, dR, dI;
if (freeflag)
{
- if (bitrev) free(bitrev);
- if (sintbl) free(sintbl);
+ if (bitrev) free(bitrev); bitrev = NULL;
+ if (sintbl) free(sintbl); sintbl = NULL;
+ last_n = 0;
return( 0 );
}
bitrev = NULL;
}
if (n == 0) return 0; /* */
- sintbl = (float *)malloc((n + n4) * sizeof(float));
+ sintbl = (double *)malloc((n + n4) * sizeof(double));
bitrev = (int *)malloc(n * sizeof(int));
#else /* by T. Nishiyama */
- sintbl = realloc(sintbl, (n + n4) * sizeof(float));
+ sintbl = realloc(sintbl, (n + n4) * sizeof(double));
bitrev = realloc(bitrev, n * sizeof(int));
#endif
if (sintbl == NULL || bitrev == NULL) {
if( clus1 == 0 )
{
- FreeDoubleVec( stra ); stra = NULL;
- FreeDoubleVec( prf1 ); prf1 = NULL;
- FreeDoubleVec( prf2 ); prf2 = NULL;
- FreeIntVec( hat1 ); hat1 = NULL;
- FreeIntVec( hat2 ); hat2 = NULL;
+ if( stra ) FreeDoubleVec( stra ); stra = NULL;
+ if( prf1 ) FreeDoubleVec( prf1 ); prf1 = NULL;
+ if( prf2 ) FreeDoubleVec( prf2 ); prf2 = NULL;
+ if( hat1 ) FreeIntVec( hat1 ); hat1 = NULL;
+ if( hat2 ) FreeIntVec( hat2 ); hat2 = NULL;
+ alloclen = 0;
return( 0 );
}
if( prf1 == NULL )
{
- prf1 = AllocateDoubleVec( 26 );
- prf2 = AllocateDoubleVec( 26 );
- hat1 = AllocateIntVec( 27 );
- hat2 = AllocateIntVec( 27 );
+ prf1 = AllocateDoubleVec( nalphabets );
+ prf2 = AllocateDoubleVec( nalphabets );
+ hat1 = AllocateIntVec( nalphabets+1 );
+ hat2 = AllocateIntVec( nalphabets+1 );
}
len = MIN( strlen( seq1[0] ), strlen( seq2[0] ) );
for( i=0; i<len; i++ )
{
/* make prfs */
- for( j=0; j<26; j++ )
+ for( j=0; j<nalphabets; j++ )
{
prf1[j] = 0.0;
prf2[j] = 0.0;
j = clus1;
while( j-- ) prf1[amino_n[(*seq1pt++)[i]]] += *eff1pt++;
#else
- for( j=0; j<clus1; j++ ) prf1[amino_n[(int)seq1[j][i]]] += eff1[j];
+ for( j=0; j<clus1; j++ ) prf1[amino_n[(unsigned char)seq1[j][i]]] += eff1[j];
#endif
- for( j=0; j<clus2; j++ ) prf2[amino_n[(int)seq2[j][i]]] += eff2[j];
+ for( j=0; j<clus2; j++ ) prf2[amino_n[(unsigned char)seq2[j][i]]] += eff2[j];
/* make hats */
- pre1 = pre2 = 26;
+ pre1 = pre2 = nalphabets;
for( j=25; j>=0; j-- )
{
if( prf1[j] )
/* make site score */
stra[i] = 0.0;
- for( k=hat1[26]; k!=-1; k=hat1[k] )
- for( j=hat2[26]; j!=-1; j=hat2[j] )
+ for( k=hat1[nalphabets]; k!=-1; k=hat1[k] )
+ for( j=hat2[nalphabets]; j!=-1; j=hat2[j] )
// stra[i] += n_dis[k][j] * prf1[k] * prf2[j];
stra[i] += n_disFFT[k][j] * prf1[k] * prf2[j];
stra[i] /= totaleff;
{
if( result1 )
{
- free( result1 );
- free( result2 );
- free( ocut1 );
- free( ocut2 );
- FreeIntMtx( track );
- FreeDoubleMtx( crossscore );
+ if( result1 ) free( result1 ); result1 = NULL;
+ if( result2 ) free( result2 ); result2 = NULL;
+ if( ocut1 ) free( ocut1 ); ocut1 = NULL;
+ if( ocut2 ) free( ocut2 ); ocut2 = NULL;
+ if( track ) FreeIntMtx( track ); track = NULL;
+ if( crossscore ) FreeDoubleMtx( crossscore ); crossscore = NULL;
}
+ crossscoresize = 0;
return;
}
--- /dev/null
+foreach i ( *.c *.h )
+ sed 's/float/double/g' $i > $i.double
+ echo $i
+ mv $i.double $i
+end
extern int intlen( int *num );
extern char seqcheck( char **seq );
-extern void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx );
+extern void scmx_calc( int icyc, char **aseq, double *effarr, double **scmx );
extern void exitall( char arr[] );
extern void display( char **seq, int nseq );
extern void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value );
+//extern void intergroup_score_dynmtx( double **pairoffset, int mtx[0x80][0x80], char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value );
+extern void intergroup_score_multimtx( int **whichmtx, double ***matrices, char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value );
extern void intergroup_score_gapnomi( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value );
extern void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value );
extern double score_calc5( char **seq, int s, double **eff, int ex );
extern double score_calc4( char **seq, int s, double **eff, int ex );
extern void upg2( int nseq, double **eff, int ***topol, double **len );
-//extern void veryfastsupg_float_realloc_nobk_halfmtx( int njob, float **mtx, int ***topol, float **len );
-//extern void veryfastsupg_float_realloc_nobk( int njob, float **mtx, int ***topol, float **len );
+//extern void veryfastsupg_double_realloc_nobk_halfmtx( int njob, double **mtx, int ***topol, double **len );
+//extern void veryfastsupg_double_realloc_nobk( int njob, double **mtx, int ***topol, double **len );
extern void veryfastsupg_int_realloc_nobk( int njob, int **mtx, int ***topol, double **len );
extern void veryfastsupg( int nseq, double **oeff, int ***topol, double **len );
extern void veryfastsupg_double( int nseq, double **oeff, int ***topol, double **len );
-extern void veryfastsupg_double_loadtree( int nseq, double **oeff, int ***topol, double **len );
-extern void veryfastsupg_double_loadtop( int nseq, double **oeff, int ***topol, double **len );
+extern void veryfastsupg_double_loadtree( int nseq, double **oeff, int ***topol, double **len, char **name );
+//extern void veryfastsupg_double_loadtop( int nseq, double **oeff, int ***topol, double **len );
extern void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len );
extern void fastsupg( int nseq, double **oeff, int ***topol, double **len );
extern void supg( int nseq, double **oeff, int ***topol, double **len );
extern void countnode( int nseq, int ***topol, double **node );
extern void countnode_int( int nseq, int ***topol, int **node );
extern void counteff_simple( int nseq, int ***topol, double **len, double *node );
-extern void counteff_simple_float( int nseq, int ***topol, float **len, double *node );
+extern void counteff_simple_double( int nseq, int ***topol, double **len, double *node );
+extern void counteff_simple_double_nostatic( int nseq, int ***topol, double **len, double *node );
+extern void counteff_simple_double_nostatic_memsave( int nseq, int ***topol, double **len, Treedep *dep, double *node );
extern void counteff( int nseq, int ***topol, double **len, double **node );
-extern float score_calc1( char *seq1, char *seq2 );
-extern float score_calcp( char *seq1, char *seq2, int len );
-extern float substitution_nid( char *seq1, char *seq2 );
-extern float substitution_score( char *seq1, char *seq2 );
-extern float substitution_hosei( char *seq1, char *seq2 );
-extern float substitution( char *seq1, char *seq2 );
+extern double score_calc1( char *seq1, char *seq2 );
+extern double score_calcp( char *seq1, char *seq2, int len );
+extern double substitution_nid( char *seq1, char *seq2 );
+extern double substitution_score( char *seq1, char *seq2 );
+extern double substitution_hosei( char *seq1, char *seq2 );
+extern double substitution( char *seq1, char *seq2 );
extern void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff );
-extern float bscore_calc( char **seq, int s, double **eff );
+extern double bscore_calc( char **seq, int s, double **eff );
extern void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax );
extern void FreeTmpSeqs( char **mseq2, char *mseq1 );
extern void gappick_samestring( char *aseq );
double **eff, double *effarr );
extern void commongappick_record( int nseq, char **seq, int *map );
extern void commongappick( int nseq, char **seq );
+extern int commongapcount( int , int, char **, char ** );
+//extern void commongaprecord( int nseq, char **seq, char *originallygapped );
extern double score_calc0( char **seq, int s, double **eff, int ex );
extern void strins( char *str1, char *str2 );
extern int isaligned( int nseq, char **seq );
extern double score_calc_for_score( int nseq, char **seq );
-extern void floatncpy( float *vec1, float *vec2, int len );
-extern float score_calc_a( char **seq, int s, double **eff );
-extern float score_calc_s( char **seq, int s, double **eff );
+extern void doublencpy( double *vec1, double *vec2, int len );
+extern double score_calc_a( char **seq, int s, double **eff );
+extern double score_calc_s( char **seq, int s, double **eff );
extern double score_calc_for_score_s( int s, char **seq );
extern double SSPscore( int s, char **seq );
extern double DSPscore( int s, char **seq );
extern int searchAnchors( int nseq, char **seq, Segment *seg );
extern char *progName( char *str );
extern void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom );
+extern void dontcalcimportance_target( int nseq, double *eff, char **seq, LocalHom **localhom, int ntarget );
+extern void calcimportance_target( int nseq, int ntarget, double *eff, char **seq, LocalHom **localhom, int *targetmap, int *targetmapr );
+extern void dontcalcimportance_lastone( int nseq, double *eff, char **seq, LocalHom **localhom );
+extern void dontcalcimportance_firstone( int nseq, double *eff, char **seq, LocalHom **localhom );
extern void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom );
+extern void calcimportance_half( int nseq, double *eff, char **seq, LocalHom **localhom );
extern void weightimportance2( int nseq, double *eff, LocalHom **localhom );
extern void weightimportance4( int clus1, int clus2, double *eff1, double *eff2, LocalHom ***localhom );
extern void extendlocalhom( int nseq, LocalHom **localhom );
extern void extendlocalhom2( int nseq, LocalHom **localhom, double **mtx );
extern int makelocal( char *s1, char *s2, int thr );
extern void mdfymtx( char **pair, int s1, double **partialmtx, double **mtx );
-extern float score_calc( char **seq, int s );
-extern void cpmx_calc( char **seq, float **cpmx, double *eff, int lgth, int clus );
-extern void cpmx_calc_new( char **seq, float **cpmx, double *eff, int lgth, int clus );
-extern void MScpmx_calc_new( char **seq, float **cpmx, double *eff, int lgth, int clus );
+extern double score_calc( char **seq, int s );
+extern void cpmx_calc( char **seq, double **cpmx, double *eff, int lgth, int clus );
+extern void cpmx_calc_new( char **seq, double **cpmx, double *eff, int lgth, int clus );
+extern void cpmx_calc_add( char **seq, double **cpmx, double *eff, int lgth, int clus );
+extern void MScpmx_calc_new( char **seq, double **cpmx, double *eff, int lgth, int clus );
extern void mseqcat( char **seq1, char **seq2, double **eff, double *effarr1, double *effarr2, char name1[M][B], char name2[M][B], int clus1, int clus2 );
extern void strnbcat( char *s1, char *s2, int m );
extern int conjuctionforgaln( int s0, int s1, char **seq, char **aseq, double *peff, double *eff, char **name, char **aname, char *d );
extern int fastconjuction( int *memlist, char **seq, char **aseq, double *peff, double *eff, char name[M][B], char aname[M][B], char *d );
extern int fastconjuction_noname_kozo( int *memlist, char **seq, char **aseq, double *peff, double *eff, double *peff_kozo, double *eff_kozo, char *d );
-extern int fastconjuction_noname( int *memlist, char **seq, char **aseq, double *peff, double *eff, char *d );
+extern int fastconjuction_noname( int *memlist, char **seq, char **aseq, double *peff, double *eff, char *d, double mineff );
+extern int fastconjuction_target( int *memlist, char **seq, char **aseq, double *peff, double *eff, char *d, double mineff, int *targetmap );
extern int fastconjuction_noweight( int *memlist, char **seq, char **aseq, double *peff, char *d );
-extern int conjuctionfortbfast( char **pair, int s, char **seq, char **aseq, double *peff, double *eff, char *d );
-extern int conjuctionfortbfast_kozo( double *tmptmp, char **pair, int s, char **seq, char **aseq, double *peff, double *eff, double *peff_kozo, double *eff_kozo, char *d );
+extern int conjuctionfortbfast_old( char **pair, int s, char **seq, char **aseq, double *peff, double *eff, char *d );
extern int conjuction( char **pair, int s, char **seq, char **aseq, double *peff, double *eff, char **name, char **aname, char *d );
-extern void floatdelete( float **cpmx, int d, int len );
+extern void doubledelete( double **cpmx, int d, int len );
extern void chardelete( char *seq, int d );
extern int RootBranchNode( int nseq, int ***topol, int step, int branch );
extern void BranchLeafNode( int nseq, int ***topol, int *node, int step, int branch );
extern void RootLeafNode( int nseq, int ***topol, int *node );
extern void nodeFromABranch( int nseq, int *result, int **pairwisenode, int ***topol, double **len, int step, int num );
-extern void OneClusterAndTheOther( int locnjob, char **pair, int *s1, int *s2, int ***topol, int step, int branch );
+//extern void OneClusterAndTheOther( int locnjob, char **pair, int *s1, int *s2, int ***topol, int step, int branch );
+extern void OneClusterAndTheOther_fast( int locnjob, int *memlist1, int *memlist2, int *s1, int *s2, char *pairbuf, int ***topol, int step, int branch, double **smalldistmtx, double **distmtx, double *distontree );
extern void makeEffMtx( int nseq, double **mtx, double *vec );
extern void node_eff( int nseq, double *eff, int *node );
extern int shrinklocalhom( char **pair, int s1, int s2, LocalHom **localhom, LocalHom ***localhomshrink );
-extern int msshrinklocalhom( char **pair, int s1, int s2, LocalHom **localhom, LocalHom ***localhomshrink );
+extern int msshrinklocalhom_fast( int *memlist1, int *memlist2, LocalHom **localhom, LocalHom ***localhomshrink );
+extern int msshrinklocalhom_fast_half( int *memlist1, int *memlist2, LocalHom **localhom, LocalHom ***localhomshrink );
+extern int msshrinklocalhom_fast_target( int *memlist1, int *memlist2, LocalHom **localhom, LocalHom ***localhomshrink, char *swaplist, int *targetmap );
extern int fastshrinklocalhom( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink );
+extern int fastshrinklocalhom_half( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink );
+extern int fastshrinklocalhom_target( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink, char *swaplist, int *targetmap );
+extern int fastshrinklocalhom_one( int *mem1, int *mem2, int norg, LocalHom **localhom, LocalHom ***localhomshrink );
extern int msfastshrinklocalhom( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink );
-extern int TreeDependentIteration( int locnjob, char **name, int nlen[M], char **aseq, char **bseq, int ***topol, double **len, int alloclen, LocalHom **localhomtable, RNApair ***single, int nkozo, char *kozoarivec );
+extern int TreeDependentIteration( int locnjob, char **name, int nlen[M], char **aseq, char **bseq, int ***topol, double **len, double **eff, int **skipthisbranch, int alloclen, LocalHom **localhomtable, RNApair ***single, int nkozo, char *kozoarivec, int ntarget, int *targetmap, int *targetmapr );
extern void checkMinusLength( int nseq, double **len );
extern void negativeMember2( int *mem, int *query, int locnseq );
extern int *negativeMember( int *query, int locnseq );
extern double calcW( Node *ob, Node *op );
extern void calcBranchWeight( double **bw, int locnseq, Node *stopol, int ***topol, double **len );
extern void branchWeightToPairWeight( int locnseq, int ***topol, double **pw, double **bw );
-extern void weightFromABranch_rec( double *result, Node *ob, Node *op );
extern void weightFromABranch( int nseq, double *result, Node *stopol, int ***topol, int step, int LorR );
+extern void distFromABranch( int nseq, double *result, Node *stopol, int ***topol, double **len, int step, int LorR );
extern void keika( char *str, int current, int all );
extern double maxItch( double *soukan, int size );
extern void calcNaiseki( Fukusosuu *value, Fukusosuu *x, Fukusosuu *y );
extern void blockAlign( int *cut1, int *cut2, double **ocrossscore, int *ncut );
extern void blockAlign2( int *cut1, int *cut2, Segment **seg1, Segment **seg2, double **ocrossscore, int *ncut );
extern void blockAlign3( int *cut1, int *cut2, Segment **seg1, Segment **seg2, double **ocrossscore, int *ncut );
-extern float A__align11( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch );
-extern float imp_match_out_scH( int i1, int j1 );
-extern void imp_match_init_strictH( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom, int forscore );
-extern float imp_match_out_scQ( int i1, int j1 );
-extern float imp_match_out_scR( int i1, int j1 );
-extern void imp_match_init_strictQ( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom, int forscore );
-extern void imp_match_init_strictR( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom, int forscore );
-extern void imp_match_init( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom );
-extern float MSalignmm( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *, char *, char *, char *, int *, int, int *, int headgp, int tailgp );
-extern float Lalignmm_hmout( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *, char *, char *, char *, float **map );
-extern float Lalign2m2m_hmout( char **seq1, char **seq2, char **seq1r, char **seq2r, char *dir1, char *dir2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *, char *, char *, char *, float **map );
-extern float MSalign11( char **seq1, char **seq2, int alloclen );
-//extern float rnalocal( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, RNApair **pair );
-extern float A__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *gs1, char *gs2, char *ge1, char *ge2, int *, int, int *, int headgp, int tailgp );
-extern float H__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *gs1, char *gs2, char *ge1, char *ge2 );
-extern float Q__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *gs1, char *gs2, char *ge1, char *ge2 );
-extern float Q__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *gs1, char *gs2, char *ge1, char *ge2, int *gapmap1, int *gapmap2 );
-extern float R__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *gs1, char *gs2, char *ge1, char *ge2 );
-extern float R__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, char *gs1, char *gs2, char *ge1, char *ge2 );
-extern float A__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, int *gapmap1, int *gapmap2 );
-extern float D__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, int *gapmap1, int *gapmap2 );
-extern float translate_and_Calign( char **mseq1, char **mseq2, double *effarr1, double *effarr2, int clus1, int clus2, int alloclen );
-extern double Fgetlag( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen );
-extern float Falign( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen, int *fftlog, int *, int, int * );
-extern float Falign_udpari_long( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen, int *fftlog );
-float Falign_localhom( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen, LocalHom ***localhom, float *totalimpmatch, int *gapmap1, int *gapmap2, int *chudanpt, int chudanref, int *chudanres );
-extern float part_imp_match_out_sc( int i1, int j1 );
-extern float part_imp_match_out_scQ( int i1, int j1 );
-extern void part_imp_match_init_strict( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, int forscore );
-extern void part_imp_match_init_strictQ( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom, int forscore );
-extern void part_imp_match_init( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom );
-extern float partA__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2, char *, char *, char *, char *, int *, int, int * );
-extern float partQ__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2, char *, char *, char *, char *);
-extern float G__align11( char **seq1, char **seq2, int alloclen, int headgp, int tailgp );
-extern float G__align11_noalign( int mtx[0x80][0x80], int penal, int penal_ex, char **seq1, char **seq2, int alloclen );
-extern float L__align11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt );
-extern float genL__align11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt );
-extern float genG__align11( char **seq1, char **seq2, int alloclen );
-extern float VAalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt );
-extern float suboptalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt )
+extern double A__align11( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch );
+extern double imp_match_out_scD( int i1, int j1 );
+extern void imp_match_init_strictD( double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1kozo, double*eff2kozo, LocalHom ***localhom, char *swaplist, int forscore, int *memlist1, int *memlist2 );
+extern void imp_match_init( double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom );
+extern double MSalignmm( double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *, char *, char *, char *, int *, int, int *, int headgp, int tailgp );
+extern double MSalignmm_variousdist( double **pairoffset, double ***matrices, double **dummtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int icyc, int jcyc, int alloclen, char *, char *, char *, char *, int *, int, int *, int headgp, int tailgp );
+extern double Lalignmm_hmout( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *, char *, char *, char *, double **map );
+extern double Lalign2m2m_hmout( char **seq1, char **seq2, char **seq1r, char **seq2r, char *dir1, char *dir2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, char *, char *, char *, char *, double **map );
+extern double MSalign11( char **seq1, char **seq2, int alloclen );
+//extern double rnalocal( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, RNApair **pair );
+extern double A__align( double **scoringmtx, char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, char *gs1, char *gs2, char *ge1, char *ge2, int *, int, int *, int headgp, int tailgp, int firstmem, int calledby );
+extern double A__align_variousdist( int **which, double ***scoringmatrices, double **dummtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, char *gs1, char *gs2, char *ge1, char *ge2, int *, int, int *, int headgp, int tailgp );
+extern double A__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, int *gapmap1, int *gapmap2 );
+//extern double D__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, int *gapmap1, int *gapmap2 );
+extern double translate_and_Calign( char **mseq1, char **mseq2, double *effarr1, double *effarr2, int clus1, int clus2, int alloclen );
+extern double Fgetlag( double **scoringmtx, char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int alloclen );
+extern double Falign( int **whichmtx, double ***scoringmatrices, double **scoreingmtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int clus1, int clus2, int alloclen, int *fftlog, int *, int, int * );
+extern double Falign_udpari_long( int **whichmtx, double ***scoringmatrices, double **scoringmtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int clus1, int clus2, int alloclen, int *fftlog );
+double Falign_localhom( int **which, double ***scoringmatrices, double **scoreingmtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int clus1, int clus2, int alloclen, LocalHom ***localhom, double *totalimpmatch, int *gapmap1, int *gapmap2, int *chudanpt, int chudanref, int *chudanres );
+extern double part_imp_match_out_sc( int i1, int j1 );
+extern void part_imp_match_init_strict( double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, char *swaplist, int forscore, int *memlist1, int *memlist2 );
+extern void part_imp_match_init( double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom );
+extern double partA__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2, char *, char *, char *, char *, int *, int, int * );
+extern double partA__align_variousdist( int **which, double ***scoringmatrices, double **dummtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2, char *, char *, char *, char *, int *, int, int * );
+extern double G__align11( double **scoringmtx, char **seq1, char **seq2, int alloclen, int headgp, int tailgp );
+extern double G__align11_noalign( double **scoringmtx, int penal, int penal_ex, char **seq1, char **seq2, int alloclen );
+extern double L__align11( double **scoringmtx, double scoreoffset, char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt );
+extern double L__align11_noalign( double **scoringmtx, char **seq1, char **seq2 );
+extern double genL__align11( double **scoringmtx, char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt );
+extern double genG__align11( char **seq1, char **seq2, int alloclen );
+extern double VAalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt );
+extern double suboptalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt )
;
extern int fft(int n, Fukusosuu *x, int dum);
extern void topolcpy( int s1[], int s2[], int *mpt1, int *mpt2 );
extern void topolswap( int s1[], int s2[], int *mpt1, int *mpt2 );
extern void reduc( double **mtx, int nseq, int im, int jm );
extern void nj( int nseq, double **omtx, int ***topol, double **dis );
-extern void JTTmtx( double **rsr, double *freq, char locamino[26], char locgrp[26], int isTM );
-extern void BLOSUMmtx( int n, double **matrix, double *freq, char *amino, char *amino_grp );
-extern void putlocalhom2( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa );
-extern void putlocalhom_str( char *al1, char *al2, double *equiv, double scale, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa );
-extern void putlocalhom_ext( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa );
-extern void putlocalhom3( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa );
-extern void putlocalhom( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa );
+extern void JTTmtx( double **rsr, double *freq, unsigned char locamino[0x80], char locgrp[0x80], int isTM );
+extern void BLOSUMmtx( int n, double **matrix, double *freq, unsigned char *amino, char *amino_grp );
+extern void extendedmtx( double **matrix, double *freq, unsigned char *amino, char *amino_grp );
+extern void putlocalhom2( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh );
+extern void putlocalhom_str( char *al1, char *al2, double *equiv, double scale, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh );
+extern void putlocalhom_ext( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh );
+extern void putlocalhom3( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh );
+extern void putlocalhom( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh );
extern char *cutal( char *al, int al_display_start, int start, int end );
extern void ErrorExit( char *message );
extern void strncpy_caseC( char *str1, char *str2, int len );
extern int getaline_fp_eof( char *s, int l, FILE *fp );
extern int getaline_fp_eof_new(char s[], int l, FILE *fp);
extern int myfgets(char s[], int l, FILE *fp);
-extern float input_new( FILE *fp, int d );
+extern double input_new( FILE *fp, int d );
extern void PreRead( FILE *fp, int *locnjob, int *locnlenmax );
extern int allSpace( char *str );
extern void Read( char name[M][B], int nlen[M], char **seq );
extern void FRead( FILE *fp, char name[][B], int nlen[], char **seq );
extern void kake2hiku( char *str );
+extern int copydatafromgui( char **namegui, char **seqgui, char **name, int *nlen, char **seq );
extern void readDataforgaln( FILE *fp, char **name, int *nlen, char **seq );
extern void readData( FILE *fp, char name[][B], int nlen[], char **seq );
extern void readData_pointer_casepreserve( FILE *fp, char **name, int *nlen, char **seq );
extern void getnumlen( FILE *fp );
extern void getnumlen_casepreserve( FILE *fp, int *nlenmin );
extern void getnumlen_nogap( FILE *fp, int *nlenmin );
+extern void getnumlen_nogap_countn( FILE *fp, int *nlenmin, double *nfreq );
extern void WriteGapFill( FILE *fp, int locnjob, char name[][B], int nlen[M], char **aseq );
extern void writeDataforgaln( FILE *fp, int locnjob, char **name, int *nlen, char **aseq );
extern void writeData( FILE *fp, int locnjob, char name[][B], int nlen[], char **aseq );
extern void writeData_pointer( FILE *fp, int locnjob, char **name, int *nlen, char **aseq );
-extern void readhat2_floathalf( FILE *fp, int nseq, char name[M][B], float **mtx );
-extern void readhat2_floathalf_pointer( FILE *fp, int nseq, char **name, float **mtx );
-extern void readhat2_float( FILE *fp, int nseq, char name[M][B], float **mtx );
+extern void readhat2_doublehalf( FILE *fp, int nseq, char name[M][B], double **mtx );
+extern void readhat2_doublehalf_pointer( FILE *fp, int nseq, char **name, double **mtx );
+extern void readhat2_doublehalf_part_pointer( FILE *fp, int nseq, int nadd, char **name, double **mtx );
+extern void readhat2_double( FILE *fp, int nseq, char name[M][B], double **mtx );
extern void readhat2_int( FILE *fp, int nseq, char name[M][B], int **mtx );
extern void readhat2_pointer( FILE *fp, int nseq, char **name, double **mtx );
extern void readhat2( FILE *fp, int nseq, char name[M][B], double **mtx );
-extern void WriteFloatHat2_pointer_halfmtx( FILE *hat2p, int locnjob, char **name, float **mtx );
-extern void WriteFloatHat2( FILE *hat2p, int locnjob, char name[M][B], float **mtx );
+extern void WriteFloatHat2_pointer_halfmtx( FILE *hat2p, int locnjob, char **name, double **mtx );
+extern void WriteFloatHat2( FILE *hat2p, int locnjob, char name[M][B], double **mtx );
extern void WriteHat2_int( FILE *hat2p, int locnjob, char name[M][B], int **mtx );
extern void WriteHat2( FILE *hat2p, int locnjob, char name[M][B], double **mtx );
extern void WriteHat2_pointer( FILE *hat2p, int locnjob, char **name, double **mtx );
+extern void WriteHat2_part_pointer( FILE *hat2p, int locnjob, int nadd, char **name, double **mtx );
extern int ReadFasta_sub( FILE *fp, double *dis, int nseq, char name[M][B] );
extern int ReadSsearch( FILE *fp, double *dis, int nseq, char name[M][B] );
-extern int ReadBlastm7( FILE *fp, double *dis, int qmem, char name[M][B], LocalHom *localhomlist );
+extern int ReadBlastm7( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist );
extern int ReadBlastm7_scoreonly( FILE *fp, double *dis, int nin );
extern int ReadBlastm7_avscore( FILE *fp, double *dis, int nin );
-extern int ReadFasta34noalign( FILE *fp, double *dis, int qmem, char name[M][B], LocalHom *localhomlist );
-extern int ReadFasta34m10_nuc( FILE *fp, double *dis, int qmem, char name[M][B], LocalHom *localhomlist );
-extern int ReadFasta34m10( FILE *fp, double *dis, int qmem, char name[M][B], LocalHom *localhomlist );
+extern int ReadFasta34noalign( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist );
+extern int ReadFasta34m10_nuc( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist );
+extern int ReadFasta34m10( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist );
extern int ReadFasta34m10_scoreonly_nuc( FILE *fp, double *dis, int nin );
extern int ReadFasta34m10_scoreonly( FILE *fp, double *dis, int nin );
extern int ReadFasta34( FILE *fp, double *dis, int nseq, char name[M][B], LocalHom *localhomlist );
extern void readOtherOptions( int *ppidptr, int *fftThresholdptr, int *fftWinSizeptr );
extern void initSignalSM( void );
extern void initFiles( void );
-extern void WriteForFasta( FILE *fp, int locnjob, char name[][B], int nlen[M], char **aseq );
+extern void WriteForFasta( FILE *fp, int locnjob, char **name, int nlen[M], char **aseq );
extern void readlocalhomtable( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec );
+extern void readlocalhomtable_half( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec );
+extern void readlocalhomtable_target( FILE*fp, int nt, int njob, LocalHom **localhomtable, char *kozoarivec, int *targetmap );
extern void readlocalhomtable2( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec );
+extern void readlocalhomtable2_half( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec );
+extern void readlocalhomtable2_target( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec, int *targetmap );
+extern void readlocalhomtable_part( FILE*fp, int njob, int nadd, LocalHom **localhomtable, char *kozoarivec );
+extern void readlocalhomtable_two( FILE*fp, int njob, int nadd, LocalHom **localhomtable, LocalHom **localhomtablex, char *kozoarivec );
+extern void readlocalhomtable_one( FILE*fp, int njob, int nadd, LocalHom **localhomtable, char *kozoarivec );
extern void outlocalhom( LocalHom **localhom, int nseq );
+extern void outlocalhom_part( LocalHom **localhom, int norg, int nadd );
extern void outlocalhompt( LocalHom ***localhom, int n1, int n2 );
+extern void FreeLocalHomTable_half( LocalHom **localhomtable, int n ) ;
extern void FreeLocalHomTable( LocalHom **localhomtable, int n ) ;
+extern void FreeLocalHomTable_part( LocalHom **localhomtable, int n, int m ) ;
+extern void FreeLocalHomTable_two( LocalHom **localhomtable, int n, int m ) ;
+extern void FreeLocalHomTable_one( LocalHom **localhomtable, int n, int m ) ;
extern void constants( int nseq, char **seq );
extern void clustalout_pointer( FILE *fp, int nseq, int maxlen, char **seq, char **name, char *mark, char *comment, int *order, int namelen );
-extern void phylipout_pointer( FILE *fp, int nseq, int maxlen, char **seq, char **name, int *order );
+extern void phylipout_pointer( FILE *fp, int nseq, int maxlen, char **seq, char **name, int *order, int namelen );
extern void writeData_reorder( FILE *fp, int locnjob, char name[][B], int nlen[], char **aseq, int *order );
extern void writeData_reorder_pointer( FILE *fp, int locnjob, char **name, int *nlen, char **aseq, int *order );
extern int load1SeqWithoutName_new( FILE *fpp, char *cbuf );
extern char *load1SeqWithoutName_realloc( FILE *fpp );
extern char *load1SeqWithoutName_realloc_casepreserve( FILE *fpp );
-extern int disttbfast( char **in, int nlen[M], char name[M][B] );
extern void searchKUorWA( FILE *fp );
extern void gapireru( char *res, char *ori, char *gt );
extern int seqlen( char *seq );
-extern void st_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len );
-extern void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len );
-extern void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len );
-extern void st_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len );
-extern void getdiaminofreq_x( float *freq, int clus, char **seq, double *eff, int len );
-extern void new_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len, char *s, char *e );
-extern void new_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len, char *g );
-extern void new_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len, char *g );
-extern void new_OpeningGapCount_zure( float *ogcp, int clus, char **seq, double *eff, int len, char *s, char *e );
-extern void getGapPattern( float *fgcp, int clus, char **seq, double *eff, int len, char *g );
-extern void getgapfreq( float *freq, int clus, char **seq, double *eff, int len );
-extern void getgapfreq_zure( float *freq, int clus, char **seq, double *eff, int len );
-//extern void getgapfreq_zure_part( float *freq, int clus, char **seq, double *eff, int len, char *s );
-extern void getgapfreq_zure_part( float *freq, int clus, char **seq, double *eff, int len, char *s );
-extern void getdiaminofreq_part( float *freq, int clus, char **seq, double *eff, int len, char *s, char *e );
-extern void getdigapfreq_part( float *freq, int clus, char **seq, double *eff, int len, char *s, char *e );
-extern void getdiaminofreq_st( float *freq, int clus, char **seq, double *eff, int len );
-extern void getdigapfreq_st( float *freq, int clus, char **seq, double *eff, int len );
+extern void st_FinalGapCount( double *fgcp, int clus, char **seq, double *eff, int len );
+extern void st_FinalGapAdd( double *fgcp, int clus, char **seq, double *eff, int len );
+extern void st_OpeningGapCount( double *ogcp, int clus, char **seq, double *eff, int len );
+extern void st_OpeningGapAdd( double *ogcp, int clus, char **seq, double *eff, int len );
+extern void st_FinalGapCount_zure( double *fgcp, int clus, char **seq, double *eff, int len );
+extern void getdiaminofreq_x( double *freq, int clus, char **seq, double *eff, int len );
+extern void new_FinalGapCount_zure( double *fgcp, int clus, char **seq, double *eff, int len, char *s, char *e );
+extern void new_FinalGapCount( double *fgcp, int clus, char **seq, double *eff, int len, char *g );
+extern void new_OpeningGapCount( double *ogcp, int clus, char **seq, double *eff, int len, char *g );
+extern void new_OpeningGapCount_zure( double *ogcp, int clus, char **seq, double *eff, int len, char *s, char *e );
+extern void getGapPattern( double *fgcp, int clus, char **seq, double *eff, int len, char *g );
+extern void getgapfreq( double *freq, int clus, char **seq, double *eff, int len );
+extern void getgapfreq_zure( double *freq, int clus, char **seq, double *eff, int len );
+//extern void getgapfreq_zure_part( double *freq, int clus, char **seq, double *eff, int len, char *s );
+extern void getgapfreq_zure_part( double *freq, int clus, char **seq, double *eff, int len, char *s );
+extern void getdiaminofreq_part( double *freq, int clus, char **seq, double *eff, int len, char *s, char *e );
+extern void getdigapfreq_part( double *freq, int clus, char **seq, double *eff, int len, char *s, char *e );
+extern void getdiaminofreq_st( double *freq, int clus, char **seq, double *eff, int len );
+extern void getdigapfreq_st( double *freq, int clus, char **seq, double *eff, int len );
extern void st_getGapPattern( Gappat **gpat, int clus, char **seq, double *eff, int len );
extern void getkyokaigap( char *g, char **s, int pos, int n );
extern double *loadaamtx( void );
-extern float naivepairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal );
-extern float naivepairscore11( char *seq1, char *seq2, int penal );
-extern float naiveQpairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal );
-extern float naiveRpairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal );
-extern float naiveHpairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal );
-extern void foldrna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, float **impmtx, int *gapmap1, int *gapmap2, RNApair *pair );
-extern void foldrna_gappick( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, float **impmtx, int *gapmap1, int *gapmap2, RNApair *pair );
+extern double naivepairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal );
+extern double naivepairscore11( char *seq1, char *seq2, int penal );
+extern double naivepairscore11_dynmtx( double **, char *seq1, char *seq2, int penal );
+extern double naivepairscorefast( char *seq1, char *seq2, int *skip1, int *skip2, int penal );
+extern double naiveQpairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal );
+extern double naiveRpairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal );
+extern double naiveHpairscore( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, int penal );
+extern void foldrna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, double **impmtx, int *gapmap1, int *gapmap2, RNApair *pair );
+extern void foldrna_gappick( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, double **impmtx, int *gapmap1, int *gapmap2, RNApair *pair );
extern void imp_rna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, int *gapmap1, int *gapmap2, RNApair *pair );
-extern void imp_rnaQ( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, int *gapmap1, int *gapmap2, RNApair *pair );
-extern void part_imp_rnaQ( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, int *gapmap1, int *gapmap2, RNApair *pair );
+extern void imp_rnaD( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, int *gapmap1, int *gapmap2, RNApair *pair );
extern void part_imp_rna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, int *gapmap1, int *gapmap2, RNApair *pair );
-extern void imp_rnaQ_gappick( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***gr1, RNApair ***gr2, int *gapmap1, int *gapmap2, RNApair *pair );
extern void foldalignedrna( int clus1, int clus2, char **mseq1, char **mseq2, double *effarr1, double *effarr2, RNApair *rnapairboth );
void readmccaskill( FILE *fp, RNApair **pairprob, int length );
void makegrouprna( RNApair ***group, RNApair ***all, int *memlist );
-void makegrouprnait( RNApair ***group, RNApair ***all, char **pair, int s );
-extern void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len, Treedep * );
-extern void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen, Treedep * );
-extern void loadtop( int nseq, float **eff, int ***topol, float **len );
-extern void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen, Treedep * );
+void makegrouprnait( RNApair ***group, RNApair ***all, char *pair, int s );
+extern void fixed_musclesupg_double_realloc_nobk_halfmtx( int nseq, double **eff, int ***topol, double **len, Treedep *, int progressout, int efffree );
+extern void fixed_musclesupg_double_realloc_nobk_halfmtx_memsave( int nseq, double **eff, int ***topol, double **len, Treedep *, int progressout, int efffree );
+extern void loadtop( int nseq, double **mtx, int ***topol, double **len, char **name, int *nlen, Treedep * );
+extern void loadtree( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *, int treeout );
+extern int check_guidetreefile( int *seed, int *npick, double *limitram );
+extern void createchain( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int shuffle, int seed );
+//extern void loadtop( int nseq, double **eff, int ***topol, double **len );
+extern void fixed_musclesupg_double_realloc_nobk_halfmtx_treeout( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *, int efffree ); // KESU
+extern void fixed_musclesupg_double_realloc_nobk_halfmtx_treeout_memsave( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *, int efffree );
+extern void fixed_supg_double_realloc_nobk_halfmtx_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *, int ncons, int **constraints, int efffree );
extern void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name );
-extern void imp_match_init_strict( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1kozo, double*eff2kozo, LocalHom ***localhom, int forscore );
+extern void fixed_supg_double_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int ncons, int **constraints );
+extern void imp_match_init_strict( double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1kozo, double*eff2kozo, LocalHom ***localhom, char *swaplist, int forscore, int *memlist1, int *memlist2 );
extern void miyataout_reorder_pointer( FILE *fp, int locnjob, int nlenmax, char **name, int *nlen, char **aseq, int *order );
extern void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name );
-extern void cpmx_ribosum( char **seq, char **seqr, char *dir, float **cpmx, double *eff, int lgth, int clus );
+extern void cpmx_ribosum( char **seq, char **seqr, char *dir, double **cpmx, double *eff, int lgth, int clus );
extern void rnaalifoldcall( char **seq, int nseq, RNApair **pairprob );
extern void readpairfoldalign( FILE *fp, char *seq1, char *seq2, char *aln1, char *aln2, int q1, int q2, int *of1, int *of2, int sumlen );
extern void write1seq( FILE *fp, char *aseq );
extern void getnumlen_nogap_outallreg( FILE *fp, int *nlenminpt );
extern double plainscore( int nseq, char **seq );
extern void eq2dash( char *s );
-extern void findnewgaps( int n, char **seq, int *gaplen );
+extern void eq2dashmatometehayaku( char **s, int n );
+extern void findnewgaps( int n, int rep, char **seq, int *gaplen );
extern void findcommongaps( int, char **, int * );
extern void adjustgapmap( int, int *, char * );
-extern void insertnewgaps( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int alloclen, char alg );
-extern void restorecommongaps( int n, char **seq, int *top0, int *top1, int *gaplen, int alloclen );
+extern void insertnewgaps_bothorders( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int gapmaplen, int alloclen, char alg, char gapchar );
+extern void insertnewgaps( int njob, int *alreadyaligned, char **seq, int *ex1, int *ex2, int *gaplen, int *gapmap, int alloclen, char alg, char gapchar );
+extern void restorecommongaps( int n, int n0, char **seq, int *top0, int *top1, int *gaplen, int alloclen, char gapchar );
+extern void restorecommongapssmoothly( int n, int n0, char **seq, int *top0, int *top1, int *gaplen, int alloclen, char gapchar );
extern int samemember( int *mem, int *cand );
+extern int samemembern( int *mem, int *cand, int candn );
extern int includemember( int *mem, int *cand );
-extern void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg );
+extern int overlapmember( int *mem1, int *mem2 );
+//extern void profilealignment( int n0, int n1, int n2, char **aln0, char **aln1, char **aln2, int alloclen, char alg );
+extern void profilealignment2( int n0, int n2, char **aln0, char **aln2, int alloclen, char alg );
+extern void sreverse( char *r, char *s );
+extern int addonetip( int njobc, int ***topolc, double **lenc, double **iscorec, int ***topol, double **len, Treedep *dep, int treeout, Addtree *addtree, int iadd, char **name, int *alnleninnode, int *nogaplen, int noalign );
+extern void intcpy( int *s1, int *s2 );
+extern void intncpy( int *s1, int *s2, int n );
+extern void fltncpy( double *s1, double *s2, int n );
+extern void intcat( int *s1, int *s2 );
+extern void readsubalignmentstable( int n, int **table, int *preservegaps, int *nsubpt, int *maxmempt );
+extern int myatoi( char * );
+extern double myatof( char * );
+extern void gapcount( double *freq, char **seq, int nseq, double *eff, int lgth );
+extern void gapcountf( double *freq, char **seq, int nseq, double *eff, int lgth );
+extern void gapcountadd( double *freq, char **seq, int nseq, double *eff, int lgth );
+extern void outgapcount( double *freq, int nseq, char *gappat, double *eff );
+extern void makedynamicmtx( double **out, double **in, double offset );
+extern double dist2offset( double dist );
+extern void reporterr( const char *str, ... );
+extern void freeconstants();
+extern void closeFiles();
+extern void FreeCommonIP();
+extern void initglobalvariables();
+extern void makeskiptable( int n, int **skip, char **seq );
+extern int generatesubalignmentstable( int nseq, int ***tablept, int *nsubpt, int *maxmempt, int ***topol, double **len, double threshold );
+extern double sumofpairsscore( int nseq, char **seq );
+//extern int maskoriginalgaps( char *repseq, char *originallygapped );
+
+//extern void restoregaponlysites( char *originallygapped, int n1, int n2, char **s1, char **s2, int rep );
+extern int isallgap( char * );
+extern int deletenewinsertions_whole( int on, int an, char **oseq, char **aseq, int **deletelist );
+extern int deletenewinsertions_whole_eq( int on, int an, char **oseq, char **aseq, int **deletelist );
+extern int recordoriginalgaps( char *originallygapped, int n, char **s );
+extern void restoreoriginalgaps( int n, char **seq, char *originalgaps );
+extern void reconstructdeletemap( int nadd, char ** addbk, int **deletelist, char **realn, FILE *fp, char **name );
+extern double D__align( double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp );
+extern double D__align_ls( double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp );
+extern double D__align_variousdist( int **whichmtx, double ***matrices, double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres, int headgp, int tailgp );
+extern double D__align_gapmap( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, int *gapmap1, int *gapmap2 );
+extern void stringshuffle( int *ary, int size );
+extern void topolorder( int n, int *order, int *posinorder, int ***topol, Treedep *dep, int pos, int child );
+extern int commonsextet_p( int *table, int *pointt );
+extern void compacttree_memsaveselectable( int nseq, double **partmtx, int *nearest, double *mindist, int **pointt, int *selfscore, char **seq, int **skiptable, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int howcompact, int memsave );
+extern double distcompact( int len1, int len2, int *table1, int *point2, int ss1, int ss2 );
+extern double distcompact_msa( char *seq1, char *seq2, int *skiptable1, int *skiptable2, int ss1, int ss2 );
+extern void fillimp( double **impmtx, double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, char *swaplist, int forscore, int *orinum1, int *orinum2 );
+extern int pairlocalalign( int ngui, int lgui, char **namegui, char **seqgui, double **distancemtx, LocalHom **localhomtable, int argc, char **argv );
+extern char creverse( char f );
+#ifndef mingw
+extern void setstacksize( rlim_t );
+#endif
static TLS int localstop;
#if 1
-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
+static void match_calc_mtx( double **mtx, double *match, char **s1, char **s2, int i1, int lgth2 )
{
- char tmpc = s1[0][i1];
char *seq2 = s2[0];
+ double *doubleptr = mtx[(unsigned char)s1[0][i1]];
while( lgth2-- )
- *match++ = amino_dis[(int)tmpc][(int)*seq2++];
+ *match++ = doubleptr[(unsigned char)*seq2++];
}
#else
-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
+static void match_calc( double *match, char **s1, char **s2, int i1, int lgth2 )
{
int j;
}
#endif
-#if 0
-static void match_calc_bk( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )
-{
- int j, k, l;
- float scarr[26];
- float **cpmxpd = floatwork;
- int **cpmxpdn = intwork;
- int count = 0;
-
- if( initialize )
- {
- for( j=0; j<lgth2; j++ )
- {
- count = 0;
- for( l=0; l<26; l++ )
- {
- if( cpmx2[l][j] )
- {
- cpmxpd[count][j] = cpmx2[l][j];
- cpmxpdn[count][j] = l;
- count++;
- }
- }
- cpmxpdn[count][j] = -1;
- }
- }
-
- for( l=0; l<26; l++ )
- {
- scarr[l] = 0.0;
- for( k=0; k<26; k++ )
- scarr[l] += n_dis[k][l] * cpmx1[k][i1];
- }
-#if 0
- {
- float *fpt, **fptpt, *fpt2;
- int *ipt, **iptpt;
- fpt2 = match;
- iptpt = cpmxpdn;
- fptpt = cpmxpd;
- while( lgth2-- )
- {
- *fpt2 = 0.0;
- ipt=*iptpt,fpt=*fptpt;
- while( *ipt > -1 )
- *fpt2 += scarr[*ipt++] * *fpt++;
- fpt2++,iptpt++,fptpt++;
- }
- }
-#else
- for( j=0; j<lgth2; j++ )
- {
- match[j] = 0.0;
- for( k=0; cpmxpdn[k][j]>-1; k++ )
- match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j];
- }
-#endif
-}
-#endif
-
-static float gentracking( float *lasthorizontalw, float *lastverticalw,
+static double gentracking( double *lasthorizontalw, double *lastverticalw,
char **seq1, char **seq2,
char **mseq1, char **mseq2,
- float **cpmx1, float **cpmx2,
+ double **cpmx1, double **cpmx2,
int **ijpi, int **ijpj, int *off1pt, int *off2pt, int endi, int endj )
{
int i, j, l, iin, jin, lgth1, lgth2, k, limk;
}
-float genL__align11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt )
+double genL__align11( double **n_dynamicmtx, char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
// int k;
int lasti, lastj;
int lgth1, lgth2;
int resultlen;
- float wm = 0.0; /* int ?????? */
- float g;
- float *currentw, *previousw;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
#if 1
- float *wtmp;
+ double *wtmp;
int *ijpipt;
int *ijpjpt;
- float *mjpt, *Mjpt, *prept, *curpt;
+ double *mjpt, *Mjpt, *prept, *curpt;
int *mpjpt, *Mpjpt;
#endif
- static TLS float mi, *m;
- static TLS float Mi, *largeM;
+ static TLS double mi, *m;
+ static TLS double Mi, *largeM;
static TLS int **ijpi;
static TLS int **ijpj;
static TLS int mpi, *mp;
static TLS int Mpi, *Mp;
- static TLS float *w1, *w2;
- static TLS float *match;
- static TLS float *initverticalw; /* kufuu sureba iranai */
- static TLS float *lastverticalw; /* kufuu sureba iranai */
+ static TLS double *w1, *w2;
+ static TLS double *match;
+ static TLS double *initverticalw; /* kufuu sureba iranai */
+ static TLS double *lastverticalw; /* kufuu sureba iranai */
static TLS char **mseq1;
static TLS char **mseq2;
static TLS char **mseq;
- static TLS float **cpmx1;
- static TLS float **cpmx2;
+ static TLS double **cpmx1;
+ static TLS double **cpmx2;
static TLS int **intwork;
- static TLS float **floatwork;
+ static TLS double **doublework;
static TLS int orlgth1 = 0, orlgth2 = 0;
- float maxwm;
- float tbk;
+ static TLS double **amino_dynamicmtx = NULL; // ??
+ double maxwm;
+ double tbk;
int tbki, tbkj;
int endali, endalj;
-// float localthr = 0.0;
-// float localthr2 = 0.0;
- float fpenalty = (float)penalty;
- float fpenalty_OP = (float)penalty_OP;
- float fpenalty_ex = (float)penalty_ex;
-// float fpenalty_EX = (float)penalty_EX;
- float foffset = (float)offset;
- float localthr = -foffset;
- float localthr2 = -foffset;
+// double localthr = 0.0;
+// double localthr2 = 0.0;
+ double fpenalty = (double)penalty;
+ double fpenalty_OP = (double)penalty_OP;
+ double fpenalty_ex = (double)penalty_ex;
+// double fpenalty_EX = (double)penalty_EX;
+ double foffset = (double)offset;
+ double localthr = -foffset;
+ double localthr2 = -foffset;
if( seq1 == NULL )
{
FreeFloatMtx( cpmx1 );
FreeFloatMtx( cpmx2 );
- FreeFloatMtx( floatwork );
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
+ if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL;
}
return( 0.0 );
FreeFloatMtx( cpmx1 );
FreeFloatMtx( cpmx2 );
- FreeFloatMtx( floatwork );
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
+ if( amino_dynamicmtx ) FreeDoubleMtx( amino_dynamicmtx ); amino_dynamicmtx = NULL;
}
ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
mseq = AllocateCharMtx( njob, ll1+ll2 );
- cpmx1 = AllocateFloatMtx( 26, ll1+2 );
- cpmx2 = AllocateFloatMtx( 26, ll2+2 );
+ cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 );
+ cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 );
+
+ doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
+ intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
- floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 );
- intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 );
+ amino_dynamicmtx = AllocateDoubleMtx( 0x100, 0x100 );
#if DEBUG
fprintf( stderr, "succeeded\n" );
orlgth2 = ll2 - 100;
}
+ for( i=0; i<nalphabets; i++) for( j=0; j<nalphabets; j++ )
+ amino_dynamicmtx[(unsigned char)amino[i]][(unsigned char)amino[j]] = (double)n_dynamicmtx[i][j];
mseq1[0] = mseq[0];
mseq2[0] = mseq[1];
currentw = w1;
previousw = w2;
- match_calc( initverticalw, seq2, seq1, 0, lgth1 );
+ match_calc_mtx( amino_dynamicmtx, initverticalw, seq2, seq1, 0, lgth1 );
- match_calc( currentw, seq1, seq2, 0, lgth2 );
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, 0, lgth2 );
lasti = lgth2+1;
previousw[0] = initverticalw[i-1];
- match_calc( currentw, seq1, seq2, i, lgth2 );
+ match_calc_mtx( amino_dynamicmtx, currentw, seq1, seq2, i, lgth2 );
#if DEBUG2
fprintf( stderr, "%c ", seq1[0][i] );
fprintf( stderr, "%5.0f ", currentw[0] );
int i, j, l;
int clus1, clus2;
int s1, s2, r1, r2;
- float pscore;
+ double pscore;
static char *indication1, *indication2;
static char **name1, **name2;
static double **partialmtx = NULL;
fprintf( stderr, "\n" );
*/
- pscore = Fgetlag( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
+ pscore = Fgetlag( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
for( i=0; (r2=topol[l][1][i])>-1; i++ )
{
--- /dev/null
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "mafft.h"
+
+int report( int status, int progress, char *message )
+{
+ fprintf( stderr, "status = %d\n", status );
+ fprintf( stderr, "progress = %d / 100\n", progress );
+ fprintf( stderr, "message = %s\n", message );
+ return( 0 );
+}
+
+int chudan( int status, int progress, char *message )
+{
+ fprintf( stderr, "status = %d\n", status );
+ fprintf( stderr, "progress = %d / 100\n", progress );
+ fprintf( stderr, "message = %s\n", message );
+ return( 2 );
+}
+int main( void )
+{
+ int i;
+ int argc;
+ char **argv;
+ char **seq;
+ char **name;
+ char *message;
+ int res;
+ int n, l, mlen;
+
+// printf( "This is interface.\n" );
+
+
+ l = 10000;
+ n = 130;
+ seq = (char **)calloc( n, sizeof( char * ) );
+ name = (char **)calloc( n, sizeof( char * ) );
+ for( i=0; i<n; i++ ) seq[i] = calloc( l+1, sizeof( char ) );
+ for( i=0; i<n; i++ ) name[i] = calloc( 100, sizeof( char ) );
+
+ for( i=0; i<10; i++ )
+ {
+ strcpy( name[i*13+0], "name0" );
+ strcpy( name[i*13+1], "name1" );
+ strcpy( name[i*13+2], "name2" );
+ strcpy( name[i*13+3], "name3" );
+ strcpy( name[i*13+4], "name4" );
+ strcpy( name[i*13+5], "name5" );
+ strcpy( name[i*13+6], "name6" );
+ strcpy( name[i*13+7], "name7" );
+ strcpy( name[i*13+8], "name8" );
+ strcpy( name[i*13+9], "name9" );
+ strcpy( name[i*13+10], "name10" );
+ strcpy( name[i*13+11], "name11" );
+ strcpy( name[i*13+12], "name12" );
+
+
+ strcpy( seq[i*13+0], "TAATTAAAGGGCCGTGGTATACTGACCATGCGAAGGTAGCATAATCATTAGCCTTTTGATTTGAGGCTGGAATGAATGGTTTGACGAGAGATGGTCTGTCTCTTCGATTAAATTGAAGTTAATCTTTAAGTGAAAAAGCTTAAATGTACTTGGAGGGCGATAAGACCCTATAGATCTTTACATTTAATTCTTTTGTCTTGCGGTAGGTAATTAGACAGAGTAAAACAATGTTCGGTTGGGGCGACGGTAAGAACAGAATAAACACTTACAACATAAACACATCAATAAATGACCATTGATCCTTAGATGAATAAAGACCAAGTTACCTTAGGGATAACAGCGTAATTCTTTTTTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG-------------------" );
+ strcpy( seq[i*13+1], "GTGTTGAAGGGCCGCGGTATTTTTGACCGTGCGAAGGTAGCATAATCATTAGTCTTTTAATTGAAGGCTCGAATGAATGGTTGGACGAAAAACACACTGTTTCAGACAAAGAATTTGAATTTAACTTTTAAGTGAAAAGGCTTAAATGAATTAAAAAGACGATAAGACCCTATAAATCTTTACATTATGTTTTGTATTTTTAAAGTTGTTGTGTATTAAAAAGAAATATAAAGTAGATGTTGTGTTGGGGCAACGATAATATAATAAGATTAGCTGTTGGTAAAATTAAACAAATGTGTTTGTTAGGAGTAGTTAGATCCTTTTTAGAGATTTAAAGATTAAGTTACTTTAGGGATAACAGCGTTATTTTTTCTGAGAGTTCTTATCGAAGAAAAAGATTGCGACCTCGATG" );
+ strcpy( seq[i*13+2], "ATATTGAAGGGCCGCGGTATTTCTGACCGTGCGAAGGTAGCATAATCATTAGTCTTTTAATTGGAGGCTCGTATGAAAGGTCGAACGAGTGATAGACTGTCTCAGCAAAAAAAAAATTGAACTTAACTTTAAAGTGAAAAGGCTTTAATATTTCAGAAAGACGATAAGACCCTATAAATCTTCACACCACCTTTTATACTAAGCCAATCTGTTTGTATAAGGAGAAGTATAAAAAACGTGTTTTGTTGGGGCAACAAAGATATAATTAAATAACTGTTTTAATTTTAAAACAAAAATTTTTGAAAATAAATTGATCCTCTCTAAAGATTAAAAGATCAAGTTACTTTAGGGATAACAGCGTCATTTTTCTTGAGAGTTCCCATCGAAAGAAAAGTTTGCGACCTCGATG---" );
+ strcpy( seq[i*13+3], "AAATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAAATTGTCTTTTAAATAAAGGCTTGAATGAAAGGTTGGACAAAGTATCATCTGTTTCTTAAATATTTATTGAATTTGACTTTTAAGTGAAAAGGCTTAAATAAATCAAAAAGACGATAAGACCCTATAAATCTTTACAATAAATATATTTTATATTTTAGTTTATAAGTGAATGATATATAAAAATATAGGTTTGTTGCGCTGGGGCGGCGTAGATATATAAATAAACTGTCTATAGTTTAAATACAATAATCATTGCTTAATATAAATTGATCCTTAAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTTTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG" );
+ strcpy( seq[i*13+4], "AAATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAAATTGTCTTTTAAATAAAGGCTTGAATGAAAGGTTGGACAAAGTATCATCTGTTTCTTAAATATTTATTGAATTTGACTTTTAAGTGAAAAGGCTTAAATAAATCAAAAAGACGATAAGACCCTATAAATCTTTACAATAAATATATTTTATATTTTAGCTTATAAGTGTATAAGAAATAAAAATATAGGTTTGTTGCGCTGGGGCGGCGTAGATATATAAATAAACTGTCTATAATTTAAATACAATAATCATTGCTTAATACAAATTGATCCTTAAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTTTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG" );
+ strcpy( seq[i*13+5], "ATATTAAAGAGCCGCAGTATTCTGACTGTGCGAAGGTAGCATAATAAATTGTCTTTTAAATGAAGGCTTGAATGAAAGGTTGGACAAAGTATCATCTGTTTCTTAAATATTTATTGAATTTGACTTTCAAGTGAAAAGGCTTGAATAAATTAAAAAGACGATAAGACCCTATAAATCTTTACAATAAACATGTTCTATATTTTAATTTATAATTGTATAAAATATTAAATTTGTAATTGTTTCGCTGGGGCGGCGTAGATATATAAATAAACTGTCTATTTATTAAATCCAGTAATTATTGATTAATAAAATTGATCCTTGAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTTTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG--" );
+ strcpy( seq[i*13+6], "ATATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAAATTGTCTTTTAAATGAAGGCTTGAATGAAAGGTTGGACAAAGAATCATCTGTTTCTTAAATATTTATTGAATTTGACTTTTAAGTGAAAAGGCTTAAATAAATTAAAAAGACGATAAGACCCTATAAATCTTAACAATAAATAGACTTTATATTTTAATTTATAAGTGTGTAAGGTATAAATTTTATAATTGTTTCGCTGGGGCGGCGTAGATATATAAATAAACTGTCTATATATTATATCCAATAATTGTTGATTAATAAAATTGATCCTTAAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTTTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG--" );
+ strcpy( seq[i*13+7], "GTATTAAAGAGCCGCAGTATTCTGACTGTGCGAAGGTAGCATAATAAATTGTCTTTTAAATGGAGGCTTGAATGAAAGGTTGGACAAAGTATCATCTGTTTCTTAAATATTTATTGAATTTGACTTTCAAGTGAAAAGGCTTGAATAAATTAAAAAGACGATAAGACCCTATAAATCTTTACAATAAACATGTTCTATATTTTAATTTATAAGTATATAAAATATTAAATTTGTATTTGTTTCGCTGGGGCGGCGTGGATATATAAATAAACTGTCTATGTGTTAAATCCAATAATTATTGATTAGTAAAATTGATCCTTAAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTTTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG--" );
+ strcpy( seq[i*13+8], "GTATTAAAGAGCCGCAGTATTCTGACTGTGCGAAGGTAGCATAATAAATTGTCTTTTAAATGGAGGCTTGAATGAAAGGTTGGACAAAGTATCATCTGTTTCTTAAATATTTATTGAATTTGACTTTCAAGTGAAAAGGCTTGAATAAATTAAAAAGACGATAAGACCCTATAAATCTTTACAATAAACATGTTCTATATTTTAATTTATAAGTGTATAAAGTATTAAATTTGTATTTGTTTCGCTGGGGCGGCGTGGATATATAAATAAACTGTCTATGTGTTAAATCCAATAATTATTGATTAATAAAATTGATCCTTAAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTTTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG--" );
+ strcpy( seq[i*13+9], "TGATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAGATTGTCTTTTAAATGGGGGCTAGAATGAATGGTTGGACAAAGTATCGTCTGTTTCTTAAATAAATATTGAATTTGACTTTTAAGTTAAAAGGCTTAAATAAAATAAAAAGACGATAAGACCCTATAAATCTTTACATAAATTATATTTTAAATTTTAAGTTATAATTTTATAAAATATAAAAGTATAGTTTTGTTGTGCTGGGGCGGCATAGATATATAAATAAACTGTCTATAAATTTGAATCAATGATTGTTGATTAATGTGGTTGATCCTTTAAGAGATTAGAAGATTAAGTTACTTTAGGGATAACAGCGTTATTTTTTCTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG-" );
+ strcpy( seq[i*13+10], "TGATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAGATTGTCTTTTAAATGGGGGCTAGAATGAATGGTTGGACAAAGTATCATCTGTTTCTTAAATAAATATTGAATTTGACTTTTAAGTTAAAAGGCTTAAATAAAATAAAAAGACGATAAGACCCTATAAATCTTTACATAAATTATATTTTAAATTTTAATTTATAATTTTATAAAATATAAAAGTATAGTTTTGTTGTGCTGGGGCGGCATAGATATATAAATAAACTGTCTATAAATTTAAATCAATAGTTGTTGATTAATATAGTTGATCCTTTAAAAGATTAGAAGATTAAGTTACTTTAGGGATAACAGCGTTATTTTTTCTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG-" );
+ strcpy( seq[i*13+11], "TAATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAGATTGGCTTTTAAATGGAGGCTGGAATGAATGGTTGGACAAAGTATCATCTGTTTCTTAAATAAATATTGAATTTGACTTTTAAGTTAAAAGGCTTAAATAAAATAAAAAGACGATAAGACCCTATAAATCTTTACATAAATTATATTTTAAATTTTAACTTATAATTTTATAAAATATAAAAGTATAATTTTGTTGTGCTGGGGCGGCGTAGATATATAAATGAACTGTCTATGAAATTAAATCAATAATAGTTGATTATTAATATTGATCCTTTAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTCTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG-" );
+ strcpy( seq[i*13+12], "TAATTAAAGAGCCGCAGTATTTTGACTGTGCGAAGGTAGCATAATAGATTGGCTTTTAAATGGAGGCTGGAATGAATGGTTGGACAAAGTATCATCTGTTTCTTAAATAAATATTGAATTTGACTTTTAAGTTAAAAGGCTTAAATAAAATAAAAAGACGATAAGACCCTATAAATCTTTACATAAATTATATTTTAAATTTTAATTTATAATTTTATAAAATATAAAAGTATAATTTTGTTGTGCTGGGGCGGCGTAGATATATAAATGAACTGTCTATGAAATTAAATCAATAATAGTTGATTATTAATATTGATCCTTTAATAGATTAAAAGATTAAGATACTTTAGGGATAACAGCGTTATTTTTTCTGAGAGTTCAAATCGACAAAAGAGTTTGCGAGCCTCGATG-" );
+
+ }
+
+ argc = 20;
+ argv = (char **)calloc( argc, sizeof( char * ) );
+ for( i=0; i<argc; i++ ) argv[i] = calloc( 100, sizeof( char ) );
+ strcpy( argv[0], "disttbfast" );
+ strcpy( argv[1], "-W" );
+ strcpy( argv[2], "6" );
+ strcpy( argv[3], "-b" );
+ strcpy( argv[4], "62" );
+ strcpy( argv[5], "-Q" );
+ strcpy( argv[6], "100" );
+ strcpy( argv[7], "-h" );
+ strcpy( argv[8], "0" );
+ strcpy( argv[9], "-F" );
+ strcpy( argv[10], "-X" );
+ strcpy( argv[11], "-s" );
+ strcpy( argv[12], "0.0" );
+ strcpy( argv[13], "-f" );
+ strcpy( argv[14], "-1.53" );
+ strcpy( argv[15], "-C" );
+ strcpy( argv[16], "0" );
+ strcpy( argv[17], "-D" ); // Necessary. DNA -> -D; Protein -> -P
+ strcpy( argv[18], "-I" ); // --add
+ strcpy( argv[19], "2" ); // --add
+
+#if 0
+ mlen = 5000;
+ message = (char *)calloc( mlen+1, sizeof( char ) );
+
+ fprintf( stderr, "first run\n" );
+ res = disttbfast( n, l, mlen, name, seq, &message, argc, argv, report );
+ fprintf( stderr, "second run\n" );
+ res = disttbfast( n, l, mlen, name, seq, &message, argc, argv, report );
+ fprintf( stderr, "third run\n" );
+ res = disttbfast( n, l, mlen, name, seq, &message, argc, argv, report );
+
+ fprintf( stderr, "\n\n\nmessage in interface = :%s:\n", message );
+ free( message );
+#else
+ fprintf( stderr, "first run\n" );
+ res = disttbfast( n, l, name, seq, argc, argv, report );
+ fprintf( stderr, "second run\n" );
+ res = disttbfast( n, l, name, seq, argc, argv, chudan );
+// fprintf( stderr, "third run\n" );
+// res = disttbfast( n, l, name, seq, argc, argv, report );
+#endif
+
+ if( res == GUI_LENGTHOVER )
+ {
+ fprintf( stderr, "length over!" );
+ }
+ else
+ {
+ fprintf( stderr, "res = %d\n", res );
+ fprintf( stdout, "Output:\n" );
+ for( i=0; i<n; i++ )
+ fprintf( stdout, "%s\n", seq[i] );
+ }
+ fprintf( stderr, "argv = \n" );
+ for( i=0; i<argc; i++ )
+ fprintf( stderr, "%s ", argv[i] );
+ fprintf( stderr, "\n" );
+
+ for( i=0; i<n; i++ ) free( seq[i] );
+ free( seq );
+ for( i=0; i<n; i++ ) free( name[i] );
+ free( name );
+ for( i=0; i<argc; i++ ) free( argv[i] );
+ free( argv );
+
+}
char creverse( char f )
{
- static char *table = NULL;
+ static TLS char *table = NULL;
+
+ if( f == 0 )
+ {
+ free( table );
+ table = NULL;
+ return( 0 );
+ }
+
if( table == NULL )
{
+ int i;
table = AllocateCharVec(0x80);
+ for( i=0; i<0x80; i++ ) table[i] = i;
+ table['A'] = 'T';
+ table['C'] = 'G';
+ table['G'] = 'C';
+ table['T'] = 'A';
+ table['U'] = 'A';
+ table['M'] = 'K';
+ table['R'] = 'Y';
+ table['W'] = 'W';
+ table['S'] = 'S';
+ table['Y'] = 'R';
+ table['K'] = 'M';
+ table['V'] = 'B';
+ table['H'] = 'D';
+ table['D'] = 'H';
+ table['B'] = 'V';
+ table['N'] = 'N';
table['a'] = 't';
table['c'] = 'g';
table['g'] = 'c';
table['d'] = 'h';
table['b'] = 'v';
table['n'] = 'n';
- table['-'] = '-';
- table['.'] = '.';
- table['*'] = '*';
+// table['-'] = '-';
+// table['.'] = '.';
+// table['*'] = '*';
}
return( table[(int)f] );
}
-static int addlocalhom_r( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, int skip )
+static int addlocalhom_r( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, int skip, char korh )
{
int pos1, pos2, start1, start2, end1, end2;
char *pt1, *pt2;
tmppt->start2 = start2;
tmppt->end1 = end1 ;
tmppt->end2 = end2 ;
+ tmppt->korh = korh ;
#if 1
sumscore += score;
start1 = pos1; start2 = pos2;
st = 1;
}
- score += (double)n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]];
+ score += (double)n_dis[(int)amino_n[(unsigned char)*pt1]][(int)amino_n[(unsigned char)*pt2]];
// fprintf( stderr, "%c-%c, score(0) = %f\n", *pt1, *pt2, score );
}
if( *pt1++ != '-' ) pos1++;
tmppt->start2 = start2;
tmppt->end1 = end1 ;
tmppt->end2 = end2 ;
+ tmppt->korh = korh ;
#if 1
sumscore += score;
}
return( nlocalhom );
}
-void putlocalhom3( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa )
+void putlocalhom3( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh )
{
int pos1, pos2, start1, start2, end1, end2;
char *pt1, *pt2;
tmppt->start2 = start2;
tmppt->end1 = end1 ;
tmppt->end2 = end2 ;
+ tmppt->korh = korh ;
#if 1
if( divpairscore )
start1 = pos1; start2 = pos2;
st = 1;
}
- score += (double)n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â
+ score += (double)n_dis[(int)amino_n[(unsigned char)*pt1]][(int)amino_n[(unsigned char)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â
// fprintf( stderr, "%c-%c, score(0) = %f\n", *pt1, *pt2, score );
}
if( *pt1++ != '-' ) pos1++;
tmppt->start2 = start2;
tmppt->end1 = end1 ;
tmppt->end2 = end2 ;
+ tmppt->korh = korh ;
#if 1
}
}
}
-void putlocalhom_ext( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa )
+void putlocalhom_ext( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh )
{
int pos1, pos2, start1, start2, end1, end2;
char *pt1, *pt2;
tmppt->start2 = start2;
tmppt->end1 = end1 ;
tmppt->end2 = end2 ;
+ tmppt->korh = korh ;
#if 1
if( divpairscore )
start1 = pos1; start2 = pos2;
st = 1;
}
- iscore += n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â
+ iscore += n_dis[(int)amino_n[(unsigned char)*pt1]][(int)amino_n[(unsigned char)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â
// fprintf( stderr, "%c-%c, iscore(0) = %d\n", *pt1, *pt2, iscore );
}
if( *pt1++ != '-' ) pos1++;
tmppt->start2 = start2;
tmppt->end1 = end1 ;
tmppt->end2 = end2 ;
+ tmppt->korh = korh ;
#if 1
if( divpairscore )
}
}
-void putlocalhom_str( char *al1, char *al2, double *equiv, double scale, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa )
+void putlocalhom_str( char *al1, char *al2, double *equiv, double scale, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh )
{
int posinaln, pos1, pos2, start1, start2, end1, end2;
char *pt1, *pt2;
tmppt->start2 = start2;
tmppt->end1 = end1 ;
tmppt->end2 = end2 ;
+ tmppt->korh = korh ;
tmppt->overlapaa = 1;
// tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600;
}
}
-void putlocalhom2( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa )
+
+
+void putlocalhom2( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh )
{
int pos1, pos2, start1, start2, end1, end2;
char *pt1, *pt2;
tmppt->start2 = start2;
tmppt->end1 = end1 ;
tmppt->end2 = end2 ;
+ tmppt->korh = korh ;
+ tmppt->nokori += 1;
+ localhompt->last = tmppt;
#if 1
if( divpairscore )
start1 = pos1; start2 = pos2;
st = 1;
}
- iscore += n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â
+ iscore += n_dis[(int)amino_n[(unsigned char)*pt1]][(int)amino_n[(unsigned char)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â
// fprintf( stderr, "%c-%c, iscore(0) = %d\n", *pt1, *pt2, iscore );
}
if( *pt1++ != '-' ) pos1++;
tmppt->start2 = start2;
tmppt->end1 = end1 ;
tmppt->end2 = end2 ;
+ tmppt->korh = korh ;
+ tmppt->nokori += 1;
+ localhompt->last = tmppt;
#if 1
if( divpairscore )
}
}
}
-void putlocalhom( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa )
+
+#if 0
+void putlocalhom( char *al1, char *al2, LocalHom *localhompt, int off1, int off2, int opt, int overlapaa, char korh )
{
int pos1, pos2, start1, start2, end1, end2;
char *pt1, *pt2;
tmppt->start2 = start2;
tmppt->end1 = end1 ;
tmppt->end2 = end2 ;
+ tmppt->korh = korh ;
#if 1
if( divpairscore )
start1 = pos1; start2 = pos2;
st = 1;
}
- score += (double)n_dis[(int)amino_n[(int)*pt1]][(int)amino_n[(int)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â
+ score += (double)n_dis[(int)amino_n[(unsigned char)*pt1]][(int)amino_n[(unsigned char)*pt2]]; // - offset ¤Ï¤¤¤é¤Ê¤¤¤«¤â
// fprintf( stderr, "%c-%c, score(0) = %f\n", *pt1, *pt2, score );
}
if( *pt1++ != '-' ) pos1++;
tmppt->start2 = start2;
tmppt->end1 = end1 ;
tmppt->end2 = end2 ;
+ tmppt->korh = korh ;
#if 1
if( divpairscore )
}
}
}
+#endif
char *cutal( char *al, int al_display_start, int start, int end )
{
else strncpy( str1, str2, len );
}
-void seqUpper( int nseq, char **seq ) /* not used */
+void seqUpper( int nseq, char **seq )
{
int i, j, len;
for( i=0; i<nseq; i++ )
return( 0 );
}
-float input_new( FILE *fp, int d )
+double input_new( FILE *fp, int d )
{
char mojiretsu[10];
int i, c;
ungetc( c, fp );
}
+#if 0
static int onlyGraph( char *str )
{
char tmp;
while( (tmp=*str++) )
{
if( 0x20 < tmp && tmp < 0x7f ) *res++ = tmp;
- if( tmp == '>' )
+ if( tmp == '>' || tmp == '(' )
{
fprintf( stderr, "========================================================\n" );
fprintf( stderr, "========================================================\n" );
fprintf( stderr, "=== \n" );
fprintf( stderr, "=== ERROR!! \n" );
- fprintf( stderr, "=== In the '--anysymbol' and '--preservecase' modes, \n" );
- fprintf( stderr, "=== '>' in sequence is unacceptable.\n" );
+// fprintf( stderr, "=== In the '--anysymbol' and '--preservecase' modes, \n" );
+ fprintf( stderr, "=== '>' and '(' are acceptable only in title lines.\n" );
fprintf( stderr, "=== \n" );
fprintf( stderr, "========================================================\n" );
fprintf( stderr, "========================================================\n" );
*res = 0;
return( res - bk );
}
+#endif
+
+static int charfilter( unsigned char *str )
+{
+ unsigned char tmp;
+ unsigned char *res = str;
+ unsigned char *bk = str;
+
+ while( (tmp=*str++) )
+ {
+// if( tmp == '=' || tmp == '*' || tmp == '<' || tmp == '>' || tmp == '(' || tmp == ')' )
+ if( tmp == '=' || tmp == '<' || tmp == '>' )
+ {
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "Characters '= < >' are not accepted in the --text mode, \nalthough most printable characters are ok.\n" );
+ fprintf( stderr, "\n" );
+ exit( 1 );
+ }
+// if( 0x20 < tmp && tmp < 0x7f )
+// if( 0x0 <=tmp && tmp < 0x100 &&
+ if( tmp != 0x0a && tmp != 0x20 && tmp != 0x0d )
+// if( tmp != '\n' && tmp != ' ' && tmp != '\t' ) // unprintable characters mo ok.
+ {
+ *res++ = tmp;
+// reporterr( "tmp=%d (%c)\n", tmp, tmp );
+ }
+ }
+ *res = 0;
+ return( res - bk );
+}
+
static int onlyAlpha_lower( char *str )
{
b = '\n';
while( ( c = getc( fpp ) ) != EOF &&
- !( ( c == '>' || c == '(' || c == EOF ) && b == '\n' ) )
+ !( ( c == '>' || c == EOF ) && b == '\n' ) )
{
*cbuf++ = (char)c; /* Ť¹¤®¤Æ¤â¤·¤é¤Ê¤¤ */
if( cbuf - val == size )
}
ungetc( c, fpp );
*cbuf = 0;
- onlyGraph( val );
+// onlyGraph( val );
+ charfilter( (unsigned char *) val );
// kake2hiku( val );
return( val );
}
b = '\n';
while( ( c = getc( fpp ) ) != EOF &&
- !( ( c == '>' || c == '(' || c == EOF ) && b == '\n' ) )
+ !( ( c == '>' || c == EOF ) && b == '\n' ) )
{
*cbuf++ = (char)c; /* Ť¹¤®¤Æ¤â¤·¤é¤Ê¤¤ */
if( cbuf - val == size )
}
ungetc( c, fpp );
*cbuf = 0;
- if( dorp == 'd' )
- onlyAlpha_lower( val );
+
+ if( nblosum == -2 )
+ {
+ charfilter( (unsigned char *) val );
+ }
else
- onlyAlpha_upper( val );
- kake2hiku( val );
+ {
+ if( dorp == 'd' )
+ onlyAlpha_lower( val );
+ else
+ onlyAlpha_upper( val );
+ kake2hiku( val );
+ }
return( val );
}
b = '\n';
while( ( c = getc( fpp ) ) != EOF && /* by T. Nishiyama */
- !( ( c == '>' || c == '(' || c == EOF ) && b == '\n' ) )
+ !( ( c == '>' || c == EOF ) && b == '\n' ) )
{
*cbuf++ = (char)c; /* Ť¹¤®¤Æ¤â¤·¤é¤Ê¤¤ */
b = c;
}
+int copydatafromgui( char **namegui, char **seqgui, char **name, int *nlen, char **seq )
+{
+ int i;
+
+
+ for( i=0; i<njob; i++ )
+ {
+ name[i][0] = '=';
+ strncpy( name[i]+1, namegui[i], B-2 );
+ name[i][B-1] = 0;
+
+ strcpy( seq[i], seqgui[i] );
+ nlen[i] = strlen( seq[i] );
+ }
+ if( dorp == 'd' )
+ seqLower( njob, seq );
+ else if( dorp == 'p' )
+ seqUpper( njob, seq );
+ else
+ {
+ reporterr( "DNA or Protein?\n" );
+ return( 1 );
+ }
+#if 0
+ free( tmpseq );
+#endif
+ if( outnumber )
+ {
+ char *namebuf;
+ char *cptr;
+ namebuf = calloc( B+100, sizeof( char ) );
+ for( i=0; i<njob; i++ )
+ {
+ namebuf[0] = '=';
+ cptr = strstr( name[i], "_numo_e_" );
+ if( cptr )
+ sprintf( namebuf+1, "_numo_s_%08d_numo_e_%s", i+1, cptr+8 );
+ else
+ sprintf( namebuf+1, "_numo_s_%08d_numo_e_%s", i+1, name[i]+1 );
+ strncpy( name[i], namebuf, B );
+ name[i][B-1] = 0;
+ }
+ free( namebuf );
+ }
+ return( 0 );
+}
+
void readData_pointer( FILE *fp, char **name, int *nlen, char **seq )
{
int i;
seqlen = strlen( tmpseq );
if( dorp == 'd' && upperCase != -1 ) seqLower( 1, &tmpseq );
- for( j=0; j<5; j++ )
+ if( outtable[i] == 2 )
{
- if( regtable[i][j*2] == -1 && regtable[i][j*2+1] == -1 ) continue;
-
- startpos = regtable[i][j*2];
- endpos = regtable[i][j*2+1];
-
- if( startpos > endpos )
- {
- endpos = regtable[i][j*2];
- startpos = regtable[i][j*2+1];
- }
-
- if( startpos < 0 ) startpos = 0;
- if( endpos < 0 ) endpos = 0;
- if( endpos >= seqlen ) endpos = seqlen-1;
- if( startpos >= seqlen ) startpos = seqlen-1;
-
+ startpos = 0;
+ endpos = seqlen-1;
outlen = endpos - startpos + 1;
- if( revtable[i][j] == 'f' )
- {
- fprintf( stderr, "startpos = %d\n", startpos );
- fprintf( stderr, "endpos = %d\n", endpos );
- fprintf( stderr, "outlen = %d\n", outlen );
- fprintf( stdout, "%.*s\n", outlen, tmpseq+startpos );
- }
- else
+ fprintf( stdout, "%.*s\n", outlen, tmpseq+startpos );
+ }
+ else
+ {
+ for( j=0; j<5; j++ )
{
- fs = AllocateCharVec( outlen+1 );
- rs = AllocateCharVec( outlen+1 );
-
- fs[outlen] = 0;
- strncpy( fs, tmpseq+startpos, outlen );
- sreverse( rs, fs );
- fprintf( stdout, "%s\n", rs );
- free( fs );
- free( rs );
+ if( regtable[i][j*2] == -1 && regtable[i][j*2+1] == -1 ) continue;
+
+ startpos = regtable[i][j*2];
+ endpos = regtable[i][j*2+1];
+
+ if( startpos > endpos )
+ {
+ endpos = regtable[i][j*2];
+ startpos = regtable[i][j*2+1];
+ }
+
+ if( startpos < 0 ) startpos = 0;
+ if( endpos < 0 ) endpos = 0;
+ if( endpos >= seqlen ) endpos = seqlen-1;
+ if( startpos >= seqlen ) startpos = seqlen-1;
+
+ outlen = endpos - startpos + 1;
+ if( revtable[i][j] == 'f' )
+ {
+ fprintf( stderr, "startpos = %d\n", startpos );
+ fprintf( stderr, "endpos = %d\n", endpos );
+ fprintf( stderr, "outlen = %d\n", outlen );
+ fprintf( stdout, "%.*s\n", outlen, tmpseq+startpos );
+ }
+ else
+ {
+ fs = AllocateCharVec( outlen+1 );
+ rs = AllocateCharVec( outlen+1 );
+
+ fs[outlen] = 0;
+ strncpy( fs, tmpseq+startpos, outlen );
+ sreverse( rs, fs );
+ fprintf( stdout, "%s\n", rs );
+ free( fs );
+ free( rs );
+ }
}
}
}
}
}
+int countATGCandN( char *s, int *countN, int *total )
+{
+ int nATGC;
+ int nChar;
+ int nN;
+ char c;
+ nN = nATGC = nChar = 0;
+
+ if( *s == 0 )
+ {
+ *total = 0;
+ return( 0 );
+ }
+
+ do
+ {
+ c = tolower( *s );
+ if( isalpha( c ) )
+ {
+ nChar++;
+ if( c == 'a' || c == 't' || c == 'g' || c == 'c' || c == 'u' || c == 'n' )
+ nATGC++;
+ if( c == 'n' )
+ nN++;
+ }
+ }
+ while( *++s );
+
+// reporterr( "nN = %d", nN );
+
+ *total = nChar;
+ *countN = nN;
+ return( nATGC );
+}
+
int countATGC( char *s, int *total )
{
int nATGC;
if( *s == 0 )
{
- total = 0;
+ *total = 0;
return( 0 );
}
return( val );
}
+int countnormalletters( char *seq, char *ref )
+{
+ int val = 0;
+ while( *seq )
+ if( strchr( ref, *seq++ ) ) val++;
+ return( val );
+}
+
void getnumlen_casepreserve( FILE *fp, int *nlenminpt )
{
int total;
}
}
+void getnumlen_nogap_countn( FILE *fp, int *nlenminpt, double *nfreq )
+{
+ int total;
+ int nsite = 0;
+ int atgcnum, nnum, nN;
+ int i, tmp;
+ char *tmpseq, *tmpname;
+ double atgcfreq;
+ tmpname = AllocateCharVec( N );
+ njob = countKUorWA( fp );
+ searchKUorWA( fp );
+ nlenmax = 0;
+ *nlenminpt = 99999999;
+ atgcnum = 0;
+ total = 0;
+ nnum = 0;
+ for( i=0; i<njob; i++ )
+ {
+ myfgets( tmpname, N-1, fp );
+ tmpseq = load1SeqWithoutName_realloc( fp );
+ tmp = countnogaplen( tmpseq );
+ if( tmp > nlenmax ) nlenmax = tmp;
+ if( tmp < *nlenminpt ) *nlenminpt = tmp;
+ atgcnum += countATGCandN( tmpseq, &nN, &nsite );
+ total += nsite;
+ nnum += nN;
+ free( tmpseq );
+ }
+ free( tmpname );
+ atgcfreq = (double)atgcnum / total;
+ *nfreq = (double)nnum / atgcnum;
+// fprintf( stderr, "##### nnum = %d\n", nnum );
+// fprintf( stderr, "##### atgcfreq = %f, *nfreq = %f\n", atgcfreq, *nfreq );
+ if( dorp == NOTSPECIFIED )
+ {
+ if( atgcfreq > 0.75 )
+ {
+ dorp = 'd';
+ upperCase = -1;
+ }
+ else
+ {
+ dorp = 'p';
+ upperCase = 0;
+ }
+ }
+}
void getnumlen_nogap( FILE *fp, int *nlenminpt )
{
int total;
}
free( tmpname );
atgcfreq = (double)atgcnum / total;
- fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq );
+// fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq );
if( dorp == NOTSPECIFIED )
{
if( atgcfreq > 0.75 )
}
free( tmpname );
atgcfreq = (double)atgcnum / total;
- fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq );
+// fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq );
if( dorp == NOTSPECIFIED )
{
if( atgcfreq > 0.75 )
}
}
+static void escapehtml( char *res, char *ori, int maxlen )
+{
+ char *res0 = res;
+ while( *ori )
+ {
+ if( *ori == '<' )
+ {
+ strcpy( res, "<" );
+ res += 3;
+ }
+ else if( *ori == '>' )
+ {
+ strcpy( res, ">" );
+ res += 3;
+ }
+ else if( *ori == '&' )
+ {
+ strcpy( res, "&" );
+ res += 4;
+ }
+ else if( *ori == '"' )
+ {
+ strcpy( res, """ );
+ res += 5;
+ }
+ else if( *ori == ' ' )
+ {
+ strcpy( res, " " );
+ res += 5;
+ }
+ else
+ {
+ *res = *ori;
+ }
+ res++;
+ ori++;
+
+ if( res - res0 -10 > N ) break;
+ }
+ *res = 0;
+}
+
void getnumlen_nogap_outallreg_web( FILE *fp, FILE *ofp, int *nlenminpt, int *isalignedpt )
{
int total;
int nsite = 0;
int atgcnum;
int alnlen = 0, alnlen_prev;
- int i, tmp;
- char *tmpseq, *tmpname;
+ int i, tmp, lennormalchar;
+ char *tmpseq, *tmpname, *tmpname2;
double atgcfreq;
tmpname = AllocateCharVec( N );
+ tmpname2 = AllocateCharVec( N );
njob = countKUorWA( fp );
searchKUorWA( fp );
nlenmax = 0;
for( i=0; i<njob; i++ )
{
myfgets( tmpname, N-1, fp );
+ tmpname2[0] = tmpname[0];
+ escapehtml( tmpname2+1, tmpname+1, N );
// fprintf( stdout, "%s\n", tmpname );
+// fprintf( stdout, "%s\n", tmpname2 );
+// exit(1);
tmpseq = load1SeqWithoutName_realloc_casepreserve( fp );
tmp = countnogaplen( tmpseq );
// fprintf( stdout, "%d\n", tmp );
if( i>0 && alnlen_prev != alnlen ) *isalignedpt = 0;
alnlen_prev = alnlen;
- free( tmpseq );
atgcfreq = (double)atgcnum / total;
// fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq );
// if( dorp == NOTSPECIFIED ) // you kentou
}
}
+ if( dorp == 'd' ) lennormalchar = countnormalletters( tmpseq, "atgcuATGCU" );
+ else lennormalchar = countnormalletters( tmpseq, "ARNDCQEGHILKMFPSTWYVarndcqeghilkmfpstwyv" );
+ free( tmpseq );
-
-
- fprintf( ofp, " <label for='s%d'><input type='checkbox' id='s%d' name='s%d' checked>%s</label>\n", i, i, i, tmpname );
+ fprintf( ofp, " <label for='s%d'><span id='ss%d'><input type='checkbox' id='s%d' name='s%d' checked></span> <input type='text' class='ll' id='ll%d' style='display:none' size='6' value='%d' readonly='readonly'>%s</label>\n", i, i, i, i, i, lennormalchar, tmpname2 );
fprintf( ofp, "<span id='t%d-0' style='display:none'>", i );
fprintf( ofp, " <a href='javascript:void(0)' onclick='ddcycle(this.form,\"t%d\")'>+reg</a>", i );
- fprintf( ofp, " Begin:<input type='text' name='b%d-0' size='8' value='1'> End:<input type='text' name='e%d-0' size='8' value='%d'>", i, i, tmp );
+ fprintf( ofp, " Begin:<input type='text' name='b%d-0' size='8' value='1' class='ie'> End:<input type='text' name='e%d-0' size='8' value='%d' class='ie'>", i, i, tmp );
if( dorp == 'd' ) fprintf( ofp, " <label for='r%d-0'><input type='checkbox' name='r%d-0' id='r%d-0'>Reverse</label>", i, i, i );
- fprintf( ofp, " Sequence Length:<input type='text' name='l%d' size='8' value='%d' readonly='readonly'>", i, tmp );
+// fprintf( ofp, " Sequence Length:<input type='text' name='l%d' size='8' value='%d' readonly='readonly'>", i, tmp );
fprintf( ofp, "\n</span>" );
fprintf( ofp, "<span id='t%d-1' style='display:none'>", i );
- fprintf( ofp, " Begin:<input type='text' name='b%d-1' size='8' value=''> End:<input type='text' name='e%d-1' size='8' value=''>", i, i );
+ fprintf( ofp, " Begin:<input type='text' name='b%d-1' size='8' value='' class='ie'> End:<input type='text' name='e%d-1' size='8' value='' class='ie'>", i, i );
if( dorp == 'd' ) fprintf( ofp, " <label for='r%d-1'><input type='checkbox' name='r%d-1' id='r%d-1'>Reverse</label>", i, i, i );
fprintf( ofp, "\n</span>" );
fprintf( ofp, "<span id='t%d-2' style='display:none'>", i );
- fprintf( ofp, " Begin:<input type='text' name='b%d-2' size='8' value=''> End:<input type='text' name='e%d-2' size='8' value=''>", i, i );
+ fprintf( ofp, " Begin:<input type='text' name='b%d-2' size='8' value='' class='ie'> End:<input type='text' name='e%d-2' size='8' value='' class='ie'>", i, i );
if( dorp == 'd' ) fprintf( ofp, " <label for='r%d-2'><input type='checkbox' name='r%d-2' id='r%d-2'>Reverse</label>", i, i, i );
fprintf( ofp, "\n</span>" );
fprintf( ofp, "<span id='t%d-3' style='display:none'>", i );
- fprintf( ofp, " Begin:<input type='text' name='b%d-3' size='8' value=''> End:<input type='text' name='e%d-3' size='8' value=''>", i, i );
+ fprintf( ofp, " Begin:<input type='text' name='b%d-3' size='8' value='' class='ie'> End:<input type='text' name='e%d-3' size='8' value='' class='ie'>", i, i );
if( dorp == 'd' ) fprintf( ofp, " <label for='r%d-3'><input type='checkbox' name='r%d-3' id='r%d-3'>Reverse</label>", i, i, i );
fprintf( ofp, "\n</span>" );
fprintf( ofp, "<span id='t%d-4' style='display:none'>", i );
- fprintf( ofp, " Begin:<input type='text' name='b%d-4' size='8' value=''> End:<input type='text' name='e%d-4' size='8' value=''>", i, i );
+ fprintf( ofp, " Begin:<input type='text' name='b%d-4' size='8' value='' class='ie'> End:<input type='text' name='e%d-4' size='8' value='' class='ie'>", i, i );
if( dorp == 'd' ) fprintf( ofp, " <label for='r%d-4'><input type='checkbox' name='r%d-4' id='r%d-4'>Reverse</label>", i, i, i );
fprintf( ofp, "\n</span>" );
}
free( tmpname );
+ free( tmpname2 );
atgcfreq = (double)atgcnum / total;
fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq );
// if( dorp == NOTSPECIFIED ) // you kentou
upperCase = 0;
}
}
+ fprintf( ofp, "\n" );
if( *isalignedpt )
{
- fprintf( ofp, "\n" );
fprintf( ofp, "<span id='tall-0' style='display:none'>" );
fprintf( ofp, "Cut the alignment\n" );
fprintf( ofp, " <a href='javascript:void(0)' onclick='ddcycle(this.form,\"tall\")'>+reg</a>" );
free( tmpseq );
}
+
atgcfreq = (double)atgcnum / total;
// fprintf( stderr, "##### atgcfreq = %f\n", atgcfreq );
if( dorp == NOTSPECIFIED )
fprintf( fp, "%.*s\n", C, aseq+j );
}
+void readhat2_doublehalf_part_pointer( FILE *fp, int nseq, int nadd, char **name, double **mtx )
+{
+ int i, j, nseq0, norg;
+ char b[B];
+ fgets( b, B, fp );
+ fgets( b, B, fp ); b[5] = 0; nseq0 = atoi( b ); if( nseq != nseq0 )
+ {
+ fprintf( stderr, "%d != %d\n", nseq, nseq0 );
+ ErrorExit( "hat2 is wrong." );
+ }
+ fgets( b, B, fp );
+ for( i=0; i<nseq; i++ )
+ {
+#if 0
+ getaline_fp_eof( b, B, fp );
+#else
+ myfgets( b, B-2, fp );
+#endif
+#if 0
+ j = MIN( strlen( b+6 ), 10 );
+ if( strncmp( name[i], b+6 , j ) )
+ {
+ fprintf( stderr, "Error in hat2\n" );
+ fprintf( stderr, "%s != %s\n", b, name[i] );
+ exit( 1 );
+ }
+#endif
+ }
+ norg = nseq-nadd;
+ for( i=0; i<norg; i++ ) for( j=0; j<nadd; j++ )
+ {
+ mtx[i][j] = ( input_new( fp, D ) );
+ }
+}
-void readhat2_floathalf_pointer( FILE *fp, int nseq, char **name, float **mtx )
+void readhat2_doublehalf_pointer( FILE *fp, int nseq, char **name, double **mtx )
{
int i, j, nseq0;
char b[B];
fgets( b, B, fp );
- fgets( b, B, fp ); b[5] = 0; nseq0 = atoi( b ); if( nseq != nseq0 ) ErrorExit( "hat2 is wrong." );
+ fgets( b, B, fp ); b[5] = 0; nseq0 = atoi( b ); if( nseq != nseq0 )
+ {
+ fprintf( stderr, "%d != %d\n", nseq, nseq0 );
+ ErrorExit( "hat2 is wrong." );
+ }
fgets( b, B, fp );
for( i=0; i<nseq; i++ )
{
mtx[i][j-i] = ( input_new( fp, D ) );
}
}
-void readhat2_floathalf( FILE *fp, int nseq, char name[M][B], float **mtx )
+void readhat2_doublehalf( FILE *fp, int nseq, char name[M][B], double **mtx )
{
int i, j, nseq0;
char b[B];
mtx[i][j-i] = ( input_new( fp, D ) );
}
}
-void readhat2_float( FILE *fp, int nseq, char name[M][B], float **mtx )
+void readhat2_double( FILE *fp, int nseq, char name[M][B], double **mtx )
{
int i, j, nseq0;
char b[B];
}
}
-void WriteFloatHat2_pointer_halfmtx( FILE *hat2p, int locnjob, char **name, float **mtx )
+void WriteFloatHat2_pointer_halfmtx( FILE *hat2p, int locnjob, char **name, double **mtx )
{
int i, j, ijsa;
double max = 0.0;
{
for( j=i+1; j<njob; j++ )
{
- fprintf( hat2p, "%#6.3f", mtx[i][j-i] );
+ fprintf( hat2p, DFORMAT, mtx[i][j-i] );
ijsa = j-i;
if( ijsa % 12 == 0 || ijsa == locnjob-i-1 ) fprintf( hat2p, "\n" );
}
}
}
-void WriteFloatHat2_pointer( FILE *hat2p, int locnjob, char **name, float **mtx )
+void WriteFloatHat2_pointer( FILE *hat2p, int locnjob, char **name, double **mtx )
{
int i, j;
double max = 0.0;
{
for( j=1; j<locnjob-i; j++ )
{
- fprintf( hat2p, "%#6.3f", mtx[i][j] );
+ fprintf( hat2p, DFORMAT, mtx[i][j] );
if( j % 12 == 0 || j == locnjob-i-1 ) fprintf( hat2p, "\n" );
}
}
}
-void WriteFloatHat2( FILE *hat2p, int locnjob, char name[M][B], float **mtx )
+void WriteFloatHat2( FILE *hat2p, int locnjob, char name[M][B], double **mtx )
{
int i, j;
double max = 0.0;
{
for( j=1; j<locnjob-i; j++ )
{
- fprintf( hat2p, "%#6.3f", mtx[i][j] );
+ fprintf( hat2p, DFORMAT, mtx[i][j] );
if( j % 12 == 0 || j == locnjob-i-1 ) fprintf( hat2p, "\n" );
}
}
{
for( j=i+1; j<locnjob; j++ )
{
- fprintf( hat2p, "%#6.3f", (float)mtx[i][j] / INTMTXSCALE );
+ fprintf( hat2p, DFORMAT, (double)mtx[i][j] / INTMTXSCALE );
if( (j-i) % 12 == 0 || j == locnjob-1 ) fprintf( hat2p, "\n" );
}
}
}
+
+void WriteHat2_part_pointer( FILE *hat2p, int locnjob, int nadd, char **name, double **mtx )
+{
+ int i, j;
+ int norg = locnjob-nadd;
+ double max = 0.0;
+// for( i=0; i<locnjob-1; i++ ) for( j=i+1; j<locnjob; j++ ) if( mtx[i][j] > max ) max = mtx[i][j];
+
+ fprintf( hat2p, "%5d\n", 1 );
+ fprintf( hat2p, "%5d\n", locnjob );
+ fprintf( hat2p, " %#6.3f\n", max * 2.5 );
+
+ for( i=0; i<locnjob; i++ ) fprintf( hat2p, "%4d. %s\n", i+1, name[i] );
+ for( i=0; i<norg; i++ )
+ {
+ for( j=0; j<nadd; j++ )
+ {
+ fprintf( hat2p, DFORMAT, mtx[i][j] );
+ if( (j+1) % 12 == 0 || j == nadd-1 ) fprintf( hat2p, "\n" );
+ }
+ }
+}
+
void WriteHat2_pointer( FILE *hat2p, int locnjob, char **name, double **mtx )
{
int i, j;
{
for( j=i+1; j<locnjob; j++ )
{
- fprintf( hat2p, "%#6.3f", mtx[i][j] );
+ fprintf( hat2p, DFORMAT, mtx[i][j] );
if( (j-i) % 12 == 0 || j == locnjob-1 ) fprintf( hat2p, "\n" );
}
}
}
+
void WriteHat2( FILE *hat2p, int locnjob, char name[M][B], double **mtx )
{
int i, j;
{
for( j=i+1; j<locnjob; j++ )
{
- fprintf( hat2p, "%#6.3f", mtx[i][j] );
+ fprintf( hat2p, DFORMAT, mtx[i][j] );
if( (j-i) % 12 == 0 || j == locnjob-1 ) fprintf( hat2p, "\n" );
}
}
return count;
}
-int ReadBlastm7( FILE *fp, double *dis, int qmem, char name[M][B], LocalHom *localhomlist )
+int ReadBlastm7( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist )
{
int count=0;
char b[B];
// fprintf( stderr, "t=%d, score = %f, qstart=%d, qend=%d, tstart=%d, tend=%d, overlapaa=%d\n", junban[count], score, qstart, qend, tstart, tend, overlapaa );
- nlocalhom += addlocalhom_r( qal, tal, localhomlist+junban[count], qstart, tstart, score, overlapaa, nlocalhom );
+ nlocalhom += addlocalhom_r( qal, tal, localhomlist+junban[count], qstart, tstart, score, overlapaa, nlocalhom, 'h' );
while( fgets( b, B-1, fp ) )
if( !strncmp( " </Hsp>:", b, 18 ) ) break;
return count;
}
-int ReadFasta34noalign( FILE *fp, double *dis, int qmem, char name[M][B], LocalHom *localhomlist )
+int ReadFasta34noalign( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist )
{
int count=0;
char b[B];
return count;
}
-int ReadFasta34m10_nuc( FILE *fp, double *dis, int qmem, char name[M][B], LocalHom *localhomlist )
+int ReadFasta34m10_nuc( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist )
{
int count=0;
char b[B];
// fprintf( stderr, "tal2 = %s\n", tal2 );
// fprintf( stderr, "putting %d - %d, opt = %d\n", qmem, junban[count-1], opt );
- putlocalhom( qal2, tal2, localhomlist+junban[count-1], qstart, tstart, opt, overlapaa );
+ putlocalhom2( qal2, tal2, localhomlist+junban[count-1], qstart, tstart, opt, overlapaa, 'h' );
}
// fprintf( stderr, "count = %d\n", count );
return count;
}
-int ReadFasta34m10( FILE *fp, double *dis, int qmem, char name[M][B], LocalHom *localhomlist )
+int ReadFasta34m10( FILE *fp, double *dis, int qmem, char **name, LocalHom *localhomlist )
{
int count=0;
char b[B];
// fprintf( stderr, "tal2 = %s\n", tal2 );
// fprintf( stderr, "putting %d - %d, opt = %d\n", qmem, junban[count-1], opt );
- putlocalhom( qal2, tal2, localhomlist+junban[count-1], qstart, tstart, opt, overlapaa );
+ putlocalhom2( qal2, tal2, localhomlist+junban[count-1], qstart, tstart, opt, overlapaa, 'h' );
}
// fprintf( stderr, "count = %d\n", count );
return count;
fflush( trap_g );
}
+void closeFiles( void )
+{
+ fclose( prep_g );
+ fclose( trap_g );
+}
-void WriteForFasta( FILE *fp, int locnjob, char name[][B], int nlen[M], char **aseq )
+
+void WriteForFasta( FILE *fp, int locnjob, char **name, int nlen[M], char **aseq )
{
static char b[N];
int i, j;
}
}
-void readlocalhomtable2( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec )
+void readlocalhomtable2_target( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec, int *targetmap )
+{
+ double opt;
+ static char buff[B];
+ char infor[100];
+ int i, j, overlapaa, start1, end1, start2, end2, it, jt;
+ LocalHom *tmpptr1, *tmpptr2;
+
+// for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) nlocalhom[i][j] = 0;
+
+ while ( NULL != fgets( buff, B-1, fp ) )
+ {
+// fprintf( stderr, "\n" );
+ sscanf( buff, "%d %d %d %lf %d %d %d %d %s", &i, &j, &overlapaa, &opt, &start1, &end1, &start2, &end2, infor );
+ if( *infor == 'k' ) kozoarivec[i] = kozoarivec[j] = 1;
+
+#if 0
+ if( start1 == end1 || start2 == end2 ) continue; //mondai ari
+#endif
+ it = targetmap[i];
+ if( it == -1 )
+ {
+ reporterr( "hat3 ga okashii. _target_ \n" );
+ exit( 1 );
+ }
+ jt = targetmap[j];
+
+
+
+// if( i < j )
+ {
+ if( localhomtable[it][j].nokori++ > 0 )
+ {
+ tmpptr1 = localhomtable[it][j].last;
+// fprintf( stderr, "reallocating, localhomtable[%d][%d].nokori = %d\n", i, j, localhomtable[i][j].nokori );
+ tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
+ tmpptr1 = tmpptr1->next;
+ tmpptr1->extended = -1;
+ tmpptr1->next = NULL;
+ localhomtable[it][j].last = tmpptr1;
+// fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", i, j, localhomtable[i][j].nokori );
+ }
+ else
+ {
+ tmpptr1 = localhomtable[it]+j;
+// fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", i, j, localhomtable[i][j].nokori );
+ }
+
+ tmpptr1->start1 = start1;
+ tmpptr1->start2 = start2;
+ tmpptr1->end1 = end1;
+ tmpptr1->end2 = end2;
+// tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600;
+// tmpptr1->opt = opt;
+ tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600;
+ tmpptr1->overlapaa = overlapaa;
+ tmpptr1->korh = *infor;
+ }
+// else
+ if( jt != -1 )
+ {
+ if( localhomtable[jt][i].nokori++ > 0 )
+ {
+ tmpptr2 = localhomtable[jt][i].last;
+ tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
+ tmpptr2 = tmpptr2->next;
+ tmpptr2->extended = -1;
+ tmpptr2->next = NULL;
+ localhomtable[jt][i].last = tmpptr2;
+// fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", j, i, localhomtable[j][i].nokori );
+ }
+ else
+ {
+ tmpptr2 = localhomtable[jt]+i;
+// fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", j, i, localhomtable[j][i].nokori );
+ }
+
+ tmpptr2->start2 = start1;
+ tmpptr2->start1 = start2;
+ tmpptr2->end2 = end1;
+ tmpptr2->end1 = end2;
+// tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600;
+// tmpptr2->opt = opt;
+ tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600;
+ tmpptr2->overlapaa = overlapaa;
+ tmpptr2->korh = *infor;
+
+// fprintf( stderr, "i=%d, j=%d, st1=%d, en1=%d, opt = %f\n", i, j, tmpptr1->start1, tmpptr1->end1, opt );
+ }
+
+ }
+}
+
+void readlocalhomtable2_half( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec )
+{
+ double opt;
+ static char buff[B];
+ char infor[100];
+ int i, j, overlapaa, start1, end1, start2, end2;
+ LocalHom *tmpptr1;
+
+// for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) nlocalhom[i][j] = 0;
+
+ while ( NULL != fgets( buff, B-1, fp ) )
+ {
+// fprintf( stderr, "\n" );
+ sscanf( buff, "%d %d %d %lf %d %d %d %d %s", &i, &j, &overlapaa, &opt, &start1, &end1, &start2, &end2, infor );
+ if( *infor == 'k' ) kozoarivec[i] = kozoarivec[j] = 1;
+
+#if 0
+ if( start1 == end1 || start2 == end2 ) continue; //mondai ari
+#endif
+
+ if( j <= i )
+ {
+ reporterr( "Check hat3. The first sequence must be younger than the second one.\n" );
+ exit( 1 );
+ }
+ {
+ if( localhomtable[i][j-i].nokori++ > 0 )
+ {
+ tmpptr1 = localhomtable[i][j-i].last;
+// fprintf( stderr, "reallocating, localhomtable[%d][%d].nokori = %d\n", i, j, localhomtable[i][j].nokori );
+ tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
+ tmpptr1 = tmpptr1->next;
+ tmpptr1->extended = -1;
+ tmpptr1->next = NULL;
+ localhomtable[i][j-i].last = tmpptr1;
+// fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", i, j, localhomtable[i][j-i].nokori );
+ }
+ else
+ {
+ tmpptr1 = localhomtable[i]+j-i;
+// fprintf( stderr, "### i,j = %d,%d, nokori=%d\n", i, j, localhomtable[i][j-i].nokori );
+ }
+
+ tmpptr1->start1 = start1;
+ tmpptr1->start2 = start2;
+ tmpptr1->end1 = end1;
+ tmpptr1->end2 = end2;
+// tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600;
+// tmpptr1->opt = opt;
+ tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600;
+ tmpptr1->overlapaa = overlapaa;
+ tmpptr1->korh = *infor;
+ }
+
+ }
+}
+
+void readlocalhomtable2( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec )
{
double opt;
static char buff[B];
}
}
-void readlocalhomtable( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec )
+
+#if 0
+void readlocalhomtable_target( FILE*fp, int ntarget, int njob, LocalHom **localhomtable, char *kozoarivec, int *targetmap )
{
double opt;
static char buff[B];
char infor[100];
- int i, j, overlapaa, start1, end1, start2, end2;
+ int i, j, overlapaa, start1, end1, start2, end2, it, jt;
int **nlocalhom = NULL;
LocalHom *tmpptr1=NULL, *tmpptr2=NULL; // by D.Mathog, a guess
nlocalhom = AllocateIntMtx( njob, njob );
- for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) nlocalhom[i][j] = 0;
+ for( i=0; i<ntarget; i++ ) for( j=0; j<njob; j++ ) nlocalhom[i][j] = 0;
while ( NULL != fgets( buff, B-1, fp ) )
{
if( start1 == end1 || start2 == end2 ) continue; //mondai ari
#endif
+ printf( "reading %d-%d\n", i, j );
+
+ it = targetmap[i];
+ if( it == -1 )
+ {
+ reporterr( "hat3 ga okashii. _target_ \n" );
+ exit( 1 );
+ }
+ jt = targetmap[j];
// if( i < j )
{
+ if( nlocalhom[it][j]++ > 0 )
+ {
+ printf( "extending %d-%d, ->%d\n", i, j, nlocalhom[it][j] );
+// fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] );
+ tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
+ tmpptr1 = tmpptr1->next;
+ tmpptr1->next = NULL;
+ }
+ else
+ {
+ tmpptr1 = localhomtable[it]+j;
+// fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] );
+ }
+
+ tmpptr1->start1 = start1; // CHUUI!!!!
+ tmpptr1->start2 = start2;
+ tmpptr1->end1 = end1; // CHUUI!!!!
+ tmpptr1->end2 = end2;
+// tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600;
+// tmpptr1->opt = opt;
+ tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600;
+ tmpptr1->overlapaa = overlapaa;
+ tmpptr1->korh = *infor;
+
+// fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt );
+
+
+ }
+// else
+
+ if( jt != -1 )
+ {
+ if( nlocalhom[jt][i]++ > 0 )
+ {
+ printf( "extending %d-%d, ->%d\n", i, j, nlocalhom[jt][i] );
+ tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
+ tmpptr2 = tmpptr2->next;
+ tmpptr2->next = NULL;
+ }
+ else
+ tmpptr2 = localhomtable[jt]+i;
+
+ tmpptr2->start2 = start1; // CHUUI!!!!
+ tmpptr2->start1 = start2;
+ tmpptr2->end2 = end1; // CHUUI!!!!
+ tmpptr2->end1 = end2;
+// tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600;
+// tmpptr2->opt = opt;
+ tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600;
+ tmpptr2->overlapaa = overlapaa;
+ tmpptr2->korh = *infor;
+
+// fprintf( stderr, "j=%d, i=%d, opt = %f\n", j, i, opt );
+ }
+
+ }
+ LocalHom *tmpptr;
+ for( tmpptr = localhomtable[1]+11; tmpptr; tmpptr=tmpptr->next )
+ fprintf( stdout, "reg1=%d-%d, reg2=%d-%d, imp=%f, opt=%f, next=%p\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt / 600 * 5.8, tmpptr->next );
+ FreeIntMtx( nlocalhom );
+}
+
+void readlocalhomtable_half( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec )
+{
+ double opt;
+ static char buff[B];
+ char infor[100];
+ int i, j, overlapaa, start1, end1, start2, end2;
+ int **nlocalhom = NULL;
+ LocalHom *tmpptr1=NULL; // by D.Mathog, a guess
+
+ nlocalhom = AllocateIntMtx( njob, njob );
+ for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) nlocalhom[i][j] = 0;
+
+ while ( NULL != fgets( buff, B-1, fp ) )
+ {
+// fprintf( stderr, "\n" );
+ sscanf( buff, "%d %d %d %lf %d %d %d %d %s", &i, &j, &overlapaa, &opt, &start1, &end1, &start2, &end2, infor );
+ if( *infor == 'k' ) kozoarivec[i] = kozoarivec[j] = 1;
+
+#if 0
+ if( start1 == end1 || start2 == end2 ) continue; //mondai ari
+#endif
+
+
+ if( j <= i )
+ {
+ reporterr( "Check hat3. The first sequence must be younger than the second one.\n" );
+ exit( 1 );
+ }
+ {
if( nlocalhom[i][j]++ > 0 )
{
// fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] );
}
else
{
- tmpptr1 = localhomtable[i]+j;
+ tmpptr1 = localhomtable[i]+j-i;
// fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] );
}
- tmpptr1->start1 = start1;
+ tmpptr1->start1 = start1; // CHUUI!!!!
tmpptr1->start2 = start2;
- tmpptr1->end1 = end1;
+ tmpptr1->end1 = end1; // CHUUI!!!!
tmpptr1->end2 = end2;
// tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600;
// tmpptr1->opt = opt;
// fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt );
}
-// else
+ }
+ FreeIntMtx( nlocalhom );
+}
+#endif
+
+void readlocalhomtable( FILE*fp, int njob, LocalHom **localhomtable, char *kozoarivec )
+{
+ double opt;
+ static char buff[B];
+ char infor[100];
+ int i, j, overlapaa, start1, end1, start2, end2;
+ int **nlocalhom = NULL;
+ LocalHom *tmpptr1=NULL, *tmpptr2=NULL; // by D.Mathog, a guess
+
+ nlocalhom = AllocateIntMtx( njob, njob );
+ for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) nlocalhom[i][j] = 0;
+
+ while ( NULL != fgets( buff, B-1, fp ) )
+ {
+// fprintf( stderr, "\n" );
+ sscanf( buff, "%d %d %d %lf %d %d %d %d %s", &i, &j, &overlapaa, &opt, &start1, &end1, &start2, &end2, infor );
+ if( *infor == 'k' ) kozoarivec[i] = kozoarivec[j] = 1;
+
+#if 0
+ if( start1 == end1 || start2 == end2 ) continue; //mondai ari
+#endif
+
+
+ if( j <= i )
+ {
+ reporterr( "Check hat3. The first sequence must be younger than the second one.\n" );
+ exit( 1 );
+ }
+ {
+ if( nlocalhom[i][j]++ > 0 )
+ {
+// fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] );
+ tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
+ tmpptr1 = tmpptr1->next;
+ tmpptr1->next = NULL;
+ }
+ else
+ {
+ tmpptr1 = localhomtable[i]+j;
+// fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] );
+ }
+
+ tmpptr1->start1 = start1; // CHUUI!!!!
+ tmpptr1->start2 = start2;
+ tmpptr1->end1 = end1; // CHUUI!!!!
+ tmpptr1->end2 = end2;
+// tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600;
+// tmpptr1->opt = opt;
+ tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600;
+ tmpptr1->overlapaa = overlapaa;
+ tmpptr1->korh = *infor;
+
+// fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt );
+ }
+// else
+ {
+ if( nlocalhom[j][i]++ > 0 )
+ {
+ tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
+ tmpptr2 = tmpptr2->next;
+ tmpptr2->next = NULL;
+ }
+ else
+ tmpptr2 = localhomtable[j]+i;
+
+ tmpptr2->start2 = start1; // CHUUI!!!!
+ tmpptr2->start1 = start2;
+ tmpptr2->end2 = end1; // CHUUI!!!!
+ tmpptr2->end1 = end2;
+// tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600;
+// tmpptr2->opt = opt;
+ tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600;
+ tmpptr2->overlapaa = overlapaa;
+ tmpptr2->korh = *infor;
+
+// fprintf( stderr, "j=%d, i=%d, opt = %f\n", j, i, opt );
+ }
+
+ }
+ FreeIntMtx( nlocalhom );
+}
+
+
+void readlocalhomtable_two( FILE*fp, int norg, int nadd, LocalHom **localhomtable, LocalHom **localhomtablex, char *kozoarivec ) // for test only
+{
+ double opt;
+ static char buff[B];
+ char infor[100];
+ int i, j, overlapaa, start1, end1, start2, end2;
+ int **nlocalhom = NULL;
+ int **nlocalhomx = NULL;
+ LocalHom *tmpptr1=NULL, *tmpptr2=NULL; // by D.Mathog, a guess
+
+ nlocalhom = AllocateIntMtx( norg, nadd );
+ for( i=0; i<norg; i++ ) for( j=0; j<nadd; j++ ) nlocalhom[i][j] = 0;
+ nlocalhomx = AllocateIntMtx( nadd, norg );
+ for( i=0; i<nadd; i++ ) for( j=0; j<norg; j++ ) nlocalhomx[i][j] = 0;
+
+ while ( NULL != fgets( buff, B-1, fp ) )
+ {
+// fprintf( stderr, "\n" );
+ sscanf( buff, "%d %d %d %lf %d %d %d %d %s", &i, &j, &overlapaa, &opt, &start1, &end1, &start2, &end2, infor );
+ if( *infor == 'k' )
+ {
+ fprintf( stderr, "Not supported!\n" );
+ exit( 1 );
+ }
+ j -= norg;
+
+#if 0
+ if( start1 == end1 || start2 == end2 ) continue; //mondai ari
+#endif
+
+
+// if( i < j )
+ {
+ if( nlocalhom[i][j]++ > 0 )
+ {
+// fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] );
+ tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
+ tmpptr1 = tmpptr1->next;
+ tmpptr1->next = NULL;
+ }
+ else
+ {
+ tmpptr1 = localhomtable[i]+j;
+// fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] );
+ }
+
+ tmpptr1->start1 = start1; // CHUUI!!!!
+ tmpptr1->start2 = start2;
+ tmpptr1->end1 = end1; // CHUUI!!!!
+ tmpptr1->end2 = end2;
+// tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600;
+// tmpptr1->opt = opt;
+ tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600;
+ tmpptr1->overlapaa = overlapaa;
+ tmpptr1->korh = *infor;
+
+// fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt );
+ }
+
+ {
+ if( nlocalhomx[j][i]++ > 0 )
+ {
+ tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
+ tmpptr2 = tmpptr2->next;
+ tmpptr2->next = NULL;
+ }
+ else
+ tmpptr2 = localhomtablex[j]+i;
+
+ tmpptr2->start2 = start1+1; // CHUUI!!!!
+ tmpptr2->start1 = start2;
+ tmpptr2->end2 = end1+1; // CHUUI!!!!
+ tmpptr2->end1 = end2;
+// tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600;
+// tmpptr2->opt = opt;
+ tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600;
+ tmpptr2->overlapaa = overlapaa;
+ tmpptr2->korh = *infor;
+
+// fprintf( stderr, "j=%d, i=%d, opt = %f\n", j, i, opt );
+ }
+
+ }
+ FreeIntMtx( nlocalhom );
+ FreeIntMtx( nlocalhomx );
+}
+
+void readlocalhomtable_one( FILE*fp, int norg, int nadd, LocalHom **localhomtable, char *kozoarivec ) // for test only
+{
+ double opt;
+ static char buff[B];
+ char infor[100];
+ int i, j, overlapaa, start1, end1, start2, end2;
+ int **nlocalhom = NULL;
+ LocalHom *tmpptr1=NULL; // by D.Mathog, a guess
+
+ nlocalhom = AllocateIntMtx( norg, nadd );
+ for( i=0; i<norg; i++ ) for( j=0; j<nadd; j++ ) nlocalhom[i][j] = 0;
+
+ while ( NULL != fgets( buff, B-1, fp ) )
+ {
+// fprintf( stderr, "\n" );
+ sscanf( buff, "%d %d %d %lf %d %d %d %d %s", &i, &j, &overlapaa, &opt, &start1, &end1, &start2, &end2, infor );
+ if( *infor == 'k' )
+ {
+ fprintf( stderr, "Not supported!\n" );
+ exit( 1 );
+ }
+ j -= norg;
+
+#if 0
+ if( start1 == end1 || start2 == end2 ) continue; //mondai ari
+#endif
+
+
+// if( i < j )
+ {
+ if( nlocalhom[i][j]++ > 0 )
+ {
+// fprintf( stderr, "reallocating, nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] );
+ tmpptr1->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
+ tmpptr1 = tmpptr1->next;
+ tmpptr1->next = NULL;
+ }
+ else
+ {
+ tmpptr1 = localhomtable[i]+j;
+// fprintf( stderr, "nlocalhom[%d][%d] = %d\n", i, j, nlocalhom[i][j] );
+ }
+
+ tmpptr1->start1 = start1; // CHUUI!!!!
+ tmpptr1->start2 = start2;
+ tmpptr1->end1 = end1; // CHUUI!!!!
+ tmpptr1->end2 = end2;
+// tmpptr1->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600;
+// tmpptr1->opt = opt;
+ tmpptr1->opt = ( opt + 0.00 ) / 5.8 * 600;
+ tmpptr1->overlapaa = overlapaa;
+ tmpptr1->korh = *infor;
+
+// fprintf( stderr, "i=%d, j=%d, opt = %f\n", i, j, opt );
+ }
+
+ }
+ FreeIntMtx( nlocalhom );
+}
+
+void outlocalhom_part( LocalHom **localhom, int norg, int nadd )
+{
+ int i, j;
+ LocalHom *tmpptr;
+ for( i=0; i<norg; i++ ) for( j=0; j<nadd; j++ )
+ {
+ tmpptr = localhom[i]+j;
+ fprintf( stdout, "%d-%d\n", i, j+norg );
+ do
+ {
+ fprintf( stdout, "reg1=%d-%d, reg2=%d-%d, imp=%f, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt / 600 * 5.8 );
+ }
+ while( (tmpptr=tmpptr->next) );
+ }
+}
+
+void outlocalhom_target( LocalHom **localhom, int norg, int nadd )
+{
+ int i, j;
+ LocalHom *tmpptr;
+ for( i=0; i<norg; i++ ) for( j=0; j<nadd; j++ )
+ {
+ tmpptr = localhom[i]+j;
+ fprintf( stdout, "%d-%d\n", i, j );
+ for( ; tmpptr; tmpptr=tmpptr->next )
+ {
+ fprintf( stdout, "reg1=%d-%d, reg2=%d-%d, imp=%f, opt=%f, next=%p\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt / 600 * 5.8, (void *)tmpptr->next );
+ }
+// while( (tmpptr=tmpptr->next) );
+ }
+}
+
+void outlocalhom_half( LocalHom **localhom, int nseq )
+{
+ int i, j;
+ LocalHom *tmpptr;
+ for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+ {
+ tmpptr = localhom[i]+j-i;
+ fprintf( stdout, "%d-%d\n", i, j );
+ do
+ {
+ fprintf( stdout, "reg1=%d-%d, reg2=%d-%d, imp=%f, opt=%f, next=%p\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt / 600 * 5.8, (void *)tmpptr->next );
+ }
+ while( (tmpptr=tmpptr->next) );
+ }
+}
+
+void outlocalhom( LocalHom **localhom, int nseq )
+{
+ int i, j;
+ LocalHom *tmpptr;
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
+ {
+ tmpptr = localhom[i]+j;
+ fprintf( stderr, "%d-%d\n", i, j );
+ do
+ {
+ fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt );
+ }
+ while( (tmpptr=tmpptr->next) );
+ }
+}
+
+void outlocalhompt( LocalHom ***localhom, int n1, int n2 )
+{
+ int i, j;
+ LocalHom *tmpptr;
+ for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+ {
+ tmpptr = localhom[i][j];
+// fprintf( stdout, "%d-%d\n", i, j );
+ do
+ {
+ fprintf( stdout, "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f, opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt );
+ }
+ while( (tmpptr=tmpptr->next) );
+ }
+}
+
+void FreeLocalHomTable_part( LocalHom **localhomtable, int n, int m )
+{
+ int i, j;
+ LocalHom *ppp, *tmpptr;
+ for( i=0; i<n; i++ )
+ {
+ for( j=0; j<m; j++ )
+ {
+ tmpptr=localhomtable[i]+j;
+ ppp = tmpptr->next;
+ for( ; tmpptr; tmpptr=ppp )
+ {
+#if DEBUG
+ fprintf( stderr, "i=%d, j=%d\n", i, j );
+#endif
+ ppp = tmpptr->next;
+ if( tmpptr!=localhomtable[i]+j )
+ {
+#if DEBUG
+ fprintf( stderr, "freeing %p\n", tmpptr );
+#endif
+ free( tmpptr );
+ }
+ }
+ }
+#if DEBUG
+ fprintf( stderr, "freeing localhomtable[%d]\n", i );
+#endif
+ free( localhomtable[i] );
+ }
+#if DEBUG
+ fprintf( stderr, "freeing localhomtable\n" );
+#endif
+ free( localhomtable );
+#if DEBUG
+ fprintf( stderr, "freed\n" );
+#endif
+}
+
+void FreeLocalHomTable_two( LocalHom **localhomtable, int n, int m )
+{
+ int i, j;
+ LocalHom *ppp, *tmpptr;
+ for( i=0; i<n; i++ )
+ {
+ for( j=0; j<m; j++ )
+ {
+ tmpptr=localhomtable[i]+j;
+ ppp = tmpptr->next;
+ for( ; tmpptr; tmpptr=ppp )
+ {
+#if DEBUG
+ fprintf( stderr, "i=%d, j=%d\n", i, j );
+#endif
+ ppp = tmpptr->next;
+ if( tmpptr!=localhomtable[i]+j )
+ {
+#if DEBUG
+ fprintf( stderr, "freeing %p\n", tmpptr );
+#endif
+ free( tmpptr );
+ }
+ }
+ }
+#if DEBUG
+ fprintf( stderr, "freeing localhomtable[%d]\n", i );
+#endif
+ free( localhomtable[i] );
+ }
+
+ for( i=n; i<n+m; i++ )
+ {
+ for( j=0; j<n; j++ )
{
- if( nlocalhom[j][i]++ > 0 )
+ tmpptr=localhomtable[i]+j;
+ ppp = tmpptr->next;
+ for( ; tmpptr; tmpptr=ppp )
{
- tmpptr2->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
- tmpptr2 = tmpptr2->next;
- tmpptr2->next = NULL;
+#if DEBUG
+ fprintf( stderr, "i=%d, j=%d\n", i, j );
+#endif
+ ppp = tmpptr->next;
+ if( tmpptr!=localhomtable[i]+j )
+ {
+#if DEBUG
+ fprintf( stderr, "freeing %p\n", tmpptr );
+#endif
+ free( tmpptr );
+ }
}
- else
- tmpptr2 = localhomtable[j]+i;
-
- tmpptr2->start2 = start1;
- tmpptr2->start1 = start2;
- tmpptr2->end2 = end1;
- tmpptr2->end1 = end2;
-// tmpptr2->opt = ( opt / overlapaa + 0.00 ) / 5.8 * 600;
-// tmpptr2->opt = opt;
- tmpptr2->opt = ( opt + 0.00 ) / 5.8 * 600;
- tmpptr2->overlapaa = overlapaa;
- tmpptr2->korh = *infor;
}
-
+#if DEBUG
+ fprintf( stderr, "freeing localhomtable[%d]\n", i );
+#endif
+ free( localhomtable[i] );
}
- FreeIntMtx( nlocalhom );
+#if DEBUG
+ fprintf( stderr, "freeing localhomtable\n" );
+#endif
+ free( localhomtable );
+#if DEBUG
+ fprintf( stderr, "freed\n" );
+#endif
}
-void outlocalhom( LocalHom **localhom, int nseq )
+void FreeLocalHomTable_one( LocalHom **localhomtable, int n, int m )
{
int i, j;
- LocalHom *tmpptr;
- for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
+ LocalHom *ppp, *tmpptr;
+ for( i=0; i<n; i++ )
{
- tmpptr = localhom[i]+j;
- fprintf( stderr, "%d-%d\n", i, j );
- do
+ for( j=0; j<m; j++ )
{
- fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt );
+ tmpptr=localhomtable[i]+j;
+ ppp = tmpptr->next;
+ for( ; tmpptr; tmpptr=ppp )
+ {
+#if DEBUG
+ fprintf( stderr, "i=%d, j=%d\n", i, j );
+#endif
+ ppp = tmpptr->next;
+ if( tmpptr!=localhomtable[i]+j )
+ {
+#if DEBUG
+ fprintf( stderr, "freeing %p\n", tmpptr );
+#endif
+ free( tmpptr );
+ }
+ }
}
- while( (tmpptr=tmpptr->next) );
+#if DEBUG
+ fprintf( stderr, "freeing localhomtable[%d]\n", i );
+#endif
+ free( localhomtable[i] );
}
+
+#if DEBUG
+ fprintf( stderr, "freeing localhomtable\n" );
+#endif
+ free( localhomtable );
+#if DEBUG
+ fprintf( stderr, "freed\n" );
+#endif
}
-void outlocalhompt( LocalHom ***localhom, int n1, int n2 )
+void FreeLocalHomTable_half( LocalHom **localhomtable, int n )
{
int i, j;
- LocalHom *tmpptr;
- for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+ LocalHom *ppp, *tmpptr;
+ for( i=0; i<n; i++ )
{
- tmpptr = localhom[i][j];
- fprintf( stderr, "%d-%d\n", i, j );
- do
+ for( j=0; j<n-i; j++ )
{
- fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f, opt=%f, wimp=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->importance, tmpptr->opt, tmpptr->wimportance );
+ tmpptr=localhomtable[i]+j;
+ ppp = tmpptr->next;
+ for( ; tmpptr; tmpptr=ppp )
+ {
+#if DEBUG
+ fprintf( stderr, "i=%d, j=%d\n", i, j );
+#endif
+ ppp = tmpptr->next;
+ if( tmpptr!=localhomtable[i]+j )
+ {
+#if DEBUG
+ fprintf( stderr, "freeing %p\n", tmpptr );
+#endif
+ free( tmpptr );
+ }
+ }
}
- while( (tmpptr=tmpptr->next) );
+#if DEBUG
+ fprintf( stderr, "freeing localhomtable[%d]\n", i );
+#endif
+ free( localhomtable[i] );
}
+#if DEBUG
+ fprintf( stderr, "freeing localhomtable\n" );
+#endif
+ free( localhomtable );
+#if DEBUG
+ fprintf( stderr, "freed\n" );
+#endif
}
-
void FreeLocalHomTable( LocalHom **localhomtable, int n )
{
int i, j;
return( val );
}
-void phylipout_pointer( FILE *fp, int nseq, int maxlen, char **seq, char **name, int *order )
+void phylipout_pointer( FILE *fp, int nseq, int maxlen, char **seq, char **name, int *order, int namelen )
{
int pos, pos2, j;
+ if( namelen == -1 ) namelen = 10;
pos = 0;
fprintf( fp, " %d %d\n", nseq, maxlen );
for( j=0; j<nseq; j++ )
{
if( pos == 0 )
- fprintf( fp, "%-10.10s", extractfirstword( name[order[j]]+1 ) );
+ fprintf( fp, "%-*.*s", namelen, namelen, extractfirstword( name[order[j]]+1 ) );
else
- fprintf( fp, " " );
+ fprintf( fp, "%-*.*s", namelen, namelen, "" );
pos2 = pos;
while( pos2 < pos+41 && pos2 < maxlen )
void clustalout_pointer( FILE *fp, int nseq, int maxlen, char **seq, char **name, char *mark, char *comment, int *order, int namelen )
{
int pos, j;
+ if( namelen == -1 ) namelen = 15;
pos = 0;
if( comment == NULL )
fprintf( fp, "CLUSTAL format alignment by MAFFT (v%s)\n\n", VERSION );
return( val );
}
+static void tab2space( char *s ) // nen no tame
+{
+ while( *s )
+ {
+ if( *s == '\t' ) *s = ' ';
+ s++;
+ }
+}
+
+static int readasubalignment( char *s, int *t, int *preservegaps )
+{
+ int v = 0;
+ char status = 's';
+ char *pt = s;
+ *preservegaps = 0;
+ tab2space( s );
+ while( *pt )
+ {
+ if( *pt == ' ' )
+ {
+ status = 's';
+ }
+ else
+ {
+ if( status == 's' )
+ {
+ if( *pt == '\n' || *pt == '#' ) break;
+ status = 'n';
+ t[v] = atoi( pt );
+ if( t[v] == 0 )
+ {
+ fprintf( stderr, "Format error? Sequences must be specified as 1, 2, 3...\n" );
+ exit( 1 );
+ }
+ if( t[v] < 0 ) *preservegaps = 1;
+ t[v] = abs( t[v] );
+ t[v] -= 1;
+ v++;
+ }
+ }
+ pt++;
+ }
+ t[v] = -1;
+ return( v );
+}
+
+static int countspace( char *s )
+{
+ int v = 0;
+ char status = 's';
+ char *pt = s;
+ tab2space( s );
+ while( *pt )
+ {
+ if( *pt == ' ' )
+ {
+ status = 's';
+ }
+ else
+ {
+ if( status == 's' )
+ {
+ if( *pt == '\n' || *pt == '#' ) break;
+ v++;
+ status = 'n';
+ if( atoi( pt ) == 0 )
+ {
+ fprintf( stderr, "Format error? Sequences should be specified as 1, 2, 3...\n" );
+ exit( 1 );
+ }
+ }
+ }
+ pt++;
+ }
+ return( v );
+}
+
+
+void readsubalignmentstable( int nseq, int **table, int *preservegaps, int *nsubpt, int *maxmempt )
+{
+ FILE *fp;
+ char *line;
+ int linelen = 1000000;
+ int nmem;
+ int lpos;
+ int i, p;
+ int *tab01;
+
+ line = calloc( linelen, sizeof( char ) );
+ fp = fopen( "_subalignmentstable", "r" );
+ if( !fp )
+ {
+ fprintf( stderr, "Cannot open _subalignmentstable\n" );
+ exit( 1 );
+ }
+ if( table == NULL )
+ {
+ *nsubpt = 0;
+ *maxmempt = 0;
+ while( 1 )
+ {
+ fgets( line, linelen-1, fp );
+ if( feof( fp ) ) break;
+ if( line[strlen(line)-1] != '\n' )
+ {
+ fprintf( stderr, "too long line? \n" );
+ exit( 1 );
+ }
+ if( line[0] == '#' ) continue;
+ if( atoi( line ) == 0 ) continue;
+ nmem = countspace( line );
+ if( nmem > *maxmempt ) *maxmempt = nmem;
+ (*nsubpt)++;
+ }
+ }
+ else
+ {
+ tab01 = calloc( nseq, sizeof( int ) );
+ for( i=0; i<nseq; i++ ) tab01[i] = 0;
+ lpos = 0;
+ while( 1 )
+ {
+ fgets( line, linelen-1, fp );
+ if( feof( fp ) ) break;
+ if( line[strlen(line)-1] != '\n' )
+ {
+ fprintf( stderr, "too long line? \n" );
+ exit( 1 );
+ }
+ if( line[0] == '#' ) continue;
+ if( atoi( line ) == 0 ) continue;
+ readasubalignment( line, table[lpos], preservegaps+lpos );
+ for( i=0; (p=table[lpos][i])!=-1; i++ )
+ {
+ if( tab01[p] )
+ {
+ fprintf( stderr, "\nSequence %d appears in different groups.\n", p+1 );
+ fprintf( stderr, "Hierarchical grouping is not supported.\n\n" );
+ exit( 1 );
+ }
+ tab01[p] = 1;
+ if( p > nseq-1 )
+ {
+ fprintf( stderr, "Sequence %d does not exist in the input sequence file.\n", p+1 );
+ exit( 1 );
+ }
+ }
+ lpos++;
+ }
+ free( tab01 );
+ }
+ fclose( fp );
+ free( line );
+}
+
void readmccaskill( FILE *fp, RNApair **pairprob, int length )
{
int *pairnum;
int i;
int left, right;
- float prob;
+ double prob;
int c;
pairnum = (int *)calloc( length, sizeof( int ) );
{
if( c != '>' )
{
- fprintf( stderr, "format error in hat4\n" );
+ fprintf( stderr, "format error in hat4 - 1\n" );
exit( 1 );
}
}
if( feof( fp ) ) break;
c = getc( fp );
ungetc( c, fp );
- if( c == '>' )
+ if( c == '>' || c == EOF )
{
break;
}
fgets( gett, 999, fp );
// fprintf( stderr, "gett = %s\n", gett );
- sscanf( gett, "%d %d %f", &left, &right, &prob );
+ sscanf( gett, "%d %d %lf", &left, &right, &prob );
if( left >= length || right >= length )
{
- fprintf( stderr, "format error in hat4\n" );
+ fprintf( stderr, "format error in hat4 - 2\n" );
exit( 1 );
}
free( maptoseq1 );
free( maptoseq2 );
}
+
+int myatoi( char *in )
+{
+ if( in == NULL )
+ {
+ fprintf( stderr, "Error in myatoi()\n" );
+ exit( 1 );
+ }
+ return( atoi( in ) );
+}
+
+double myatof( char *in )
+{
+ if( in == NULL )
+ {
+ fprintf( stderr, "Error in myatof()\n" );
+ exit( 1 );
+ }
+ return( atof( in ) );
+}
+
+void reporterr( const char *str, ... )
+{
+// static int loglen = 0;
+ va_list args;
+
+ if( gmsg )
+ {
+# if 1 // ato de sakujo
+ static FILE *errtmpfp = NULL;
+ if( errtmpfp == NULL )
+ errtmpfp = fopen( "maffterr", "w" );
+ else
+ errtmpfp = fopen( "maffterr", "a" );
+ va_start( args, str );
+ vfprintf( errtmpfp, str, args );
+ va_end( args );
+ fclose( errtmpfp );
+#endif
+
+#if 0
+ char *tmpptr;
+ tmpptr = (char *)realloc( *gmsg, (loglen+10000) * sizeof( char ) );
+ if( tmpptr == NULL )
+ {
+ fprintf( stderr, "Cannot relloc *gmsg\n" );
+ exit( 1 );
+ }
+ *gmsg = tmpptr;
+ va_start( args, str );
+ loglen += vsprintf( *gmsg + loglen, str, args );
+ va_end( args );
+
+
+ va_start( args, str );
+ loglen += vsprintf( *gmsg + loglen, str, args );
+ va_end( args );
+ *(*gmsg + loglen) = 0;
+ if( loglen > gmsglen - 100 ) loglen = 0; // tekitou
+#endif
+
+ }
+ else
+ {
+ va_start( args, str );
+ vfprintf( stderr, str, args );
+ va_end( args );
+// fflush( stderr ); // iru?
+ }
+ return;
+}
+
+
+#ifndef mingw
+void setstacksize(rlim_t kStackSize )
+{
+// const rlim_t kStackSize = 100 * 1024 * 1024; // min stack size = 10MB
+ struct rlimit rl;
+ int result;
+ rlim_t originalsize;
+
+ result = getrlimit(RLIMIT_STACK, &rl);
+ if (result == 0)
+ {
+ originalsize = rl.rlim_cur;
+ if (rl.rlim_cur < kStackSize)
+ {
+ rl.rlim_cur = kStackSize;
+ reporterr( "stacksize: %d kb->%d kb\n", originalsize/1024, rl.rlim_cur/1024 );
+ result = setrlimit(RLIMIT_STACK, &rl);
+ if (result != 0)
+ {
+ reporterr( "Warning: Failed to extend stack size. It's ok in most cases but there may be problems in --pileup and --chainedtree.\n" );
+ }
+ }
+ else
+ reporterr( "stacksize: %d kb\n", rl.rlim_cur / 1024 );
+ }
+ else
+ reporterr( "Warning: Cannot check stack size.\n" );
+}
+#endif
+
+
+
+
+#if 0
+#include <sys/time.h>
+#include <sys/resource.h>
+
+void use_getrusage(void)
+{
+ struct rusage r;
+ if (getrusage(RUSAGE_SELF, &r) != 0) {
+ /*Failure*/
+ }
+ fprintf(stderr, "\nmaxrss = %ld MB\n", r.ru_maxrss/1000);
+}
+
+#endif
-char **align0( float *wm, char **aseq, char *seq, double effarr[M], int icyc, int ex )
+char **align0( double *wm, char **aseq, char *seq, double effarr[M], int icyc, int ex )
{
char **result;
#endif
int cyc[2];
char shindou = 0;
- float wm;
+ double wm;
int returnvalue;
for( i=0; i<locnjob; i++ )
sss[1] = 0;
sss[0] = locnjob-1;
/*
- sss[0] = (int)( (float)locnjob/2.0 );
+ sss[0] = (int)( (double)locnjob/2.0 );
*/
ou = 1;
cyc[0] = 0; cyc[1] = 0;
--- /dev/null
+#! /bin/bash
+
+er=0;
+myself=`dirname "$0"`/`basename "$0"`; export myself
+version="v7.310 (2017/Mar/17)"; export version
+LANG=C; export LANG
+os=`uname`
+progname=`basename "$0"`
+if [ `echo $os | grep -i cygwin` ]; then
+ os="cygwin"
+elif [ `echo $os | grep -i mingw` ]; then
+ os="mingw"
+elif [ `echo $os | grep -i darwin` ]; then
+ os="darwin"
+elif [ `echo $os | grep -i sunos` ]; then
+ os="sunos"
+elif [ `echo $os | grep -i linux` ]; then
+ os="linux"
+else
+ os="unix"
+fi
+export os
+
+if [ "$MAFFT_BINARIES" ]; then
+ prefix="$MAFFT_BINARIES"
+else
+ prefix=/usr/local/libexec/mafft
+fi
+export prefix
+
+if [ $# -gt 0 ]; then
+ if [ "$1" = "--man" ]; then
+ man "$prefix/mafft.1"
+ exit 0;
+ fi
+fi
+
+if [ -x "$prefix/version" ]; then
+ versionbin=`"$prefix/version"` # for cygwin 2.7
+ else
+ versionbin="0.000"
+fi
+
+if ! expr "$version" : v"$versionbin" > /dev/null ; then
+ echo "" 1>&2
+ echo "v$versionbin != $version" 1>&2
+ echo "" 1>&2
+ echo "There is a problem in the configuration of your shell." 1>&2
+ echo "Check the MAFFT_BINARIES environmental variable by" 1>&2
+ echo "$ echo \$MAFFT_BINARIES" 1>&2
+ echo "" 1>&2
+ echo "This variable must be *unset*, unless you have installed MAFFT" 1>&2
+ echo "with a special configuration. To unset this variable, type" 1>&2
+ echo "$ unset MAFFT_BINARIES" 1>&2
+ echo "or" 1>&2
+ echo "% unsetenv MAFFT_BINARIES" 1>&2
+ echo "Then retry" 1>&2
+ echo "$ mafft input > output" 1>&2
+ echo "" 1>&2
+ echo "To keep this change permanently, edit setting files" 1>&2
+ echo "(.bash_profile, .profile, .cshrc, etc) in your home directory" 1>&2
+ echo "to delete the MAFFT_BINARIES line." 1>&2
+ echo "On MacOSX, also edit or remove the .MacOSX/environment.plist file" 1>&2
+ echo "and then re-login (MacOSX 10.6) or reboot (MacOSX 10.7)." 1>&2
+ echo "" 1>&2
+ echo "Please send a problem report to kazutaka.katoh@aist.go.jp," 1>&2
+ echo "if this problem remains." 1>&2
+ echo "" 1>&2
+ exit 1
+ er=1
+fi
+
+defaultiterate=0
+defaultcycle=2
+defaultgop="1.53"
+#defaultaof="0.123"
+defaultaof="0.000"
+defaultlaof="0.100"
+defaultlgop="-2.00"
+defaultfft=1
+defaultrough=0
+defaultdistance="ktuples"
+#defaultdistance="local"
+defaultweighti="2.7"
+defaultweightr="0.0"
+defaultweightm="1.0"
+defaultdafs=0
+defaultmccaskill=0
+defaultcontrafold=0
+defaultalgopt=" "
+defaultalgoptit=" "
+defaultsbstmodel=" -b 62 "
+defaultfmodel=" "
+defaultkappa=" "
+if [ $progname = "xinsi" -o $progname = "mafft-xinsi" ]; then
+ defaultfft=1
+ defaultcycle=1
+ defaultiterate=1000
+ defaultdistance="scarna"
+ defaultweighti="3.2"
+ defaultweightr="8.0"
+ defaultweightm="2.0"
+ defaultmccaskill=1
+ defaultcontrafold=0
+ defaultdafs=0
+ defaultalgopt=" -A "
+ defaultalgoptit=" -AB " ## chui
+ defaultaof="0.0"
+ defaultsbstmodel=" -b 62 "
+ defaultkappa=" "
+ defaultfmodel=" " # 2013/06/18
+elif [ $progname = "qinsi" -o $progname = "mafft-qinsi" ]; then
+ defaultfft=1
+ defaultcycle=1
+ defaultiterate=1000
+ defaultdistance="global"
+ defaultweighti="3.2"
+ defaultweightr="8.0"
+ defaultweightm="2.0"
+ defaultmccaskill=1
+ defaultcontrafold=0
+ defaultdafs=0
+ defaultalgopt=" -A "
+ defaultalgoptit=" -AB " ## chui
+ defaultaof="0.0"
+ defaultsbstmodel=" -b 62 "
+ defaultkappa=" "
+ defaultfmodel=" " # 2013/06/18
+elif [ $progname = "linsi" -o $progname = "mafft-linsi" ]; then
+ defaultfft=0
+ defaultcycle=1
+ defaultiterate=1000
+ defaultdistance="local"
+elif [ $progname = "ginsi" -o $progname = "mafft-ginsi" ]; then
+ defaultfft=1
+ defaultcycle=1
+ defaultiterate=1000
+ defaultdistance="global"
+elif [ $progname = "einsi" -o $progname = "mafft-einsi" ]; then
+ defaultfft=0
+ defaultcycle=1
+ defaultiterate=1000
+ defaultdistance="localgenaf"
+elif [ $progname = "fftns" -o $progname = "mafft-fftns" ]; then
+ defaultfft=1
+ defaultcycle=2
+ defaultdistance="ktuples"
+elif [ $progname = "fftnsi" -o $progname = "mafft-fftnsi" ]; then
+ defaultfft=1
+ defaultcycle=2
+ defaultiterate=2
+ defaultdistance="ktuples"
+elif [ $progname = "nwns" -o $progname = "mafft-nwns" ]; then
+ defaultfft=0
+ defaultcycle=2
+ defaultdistance="ktuples"
+elif [ $progname = "nwnsi" -o $progname = "mafft-nwnsi" ]; then
+ defaultfft=0
+ defaultcycle=2
+ defaultiterate=2
+ defaultdistance="ktuples"
+fi
+outputfile=""
+namelength=-1
+anysymbol=0
+parallelizationstrategy="BAATARI2"
+kappa=$defaultkappa
+sbstmodel=$defaultsbstmodel
+fmodel=$defaultfmodel
+nmodel=" "
+gop=$defaultgop
+gopdist=$defaultgop
+aof=$defaultaof
+cycle=$defaultcycle
+iterate=$defaultiterate
+fft=$defaultfft
+rough=$defaultrough
+distance=$defaultdistance
+forcefft=0
+memopt=" "
+weightopt=" "
+GGOP="-6.00"
+LGOP="-6.00"
+LEXP="-0.000"
+GEXP="-0.000"
+lgop=$defaultlgop
+lexp="-0.100"
+laof=$defaultlaof
+pggop="-2.00"
+pgexp="-0.10"
+pgaof="0.10"
+rgop="-1.530"
+rgep="-0.000"
+seqtype=" "
+weighti=$defaultweighti
+weightr=$defaultweightr
+weightm=$defaultweightm
+rnaalifold=0
+dafs=$defaultdafs
+mccaskill=$defaultmccaskill
+contrafold=$defaultcontrafold
+progressfile="/dev/stderr"
+debug=0
+sw=0
+algopt=$defaultalgopt
+algoptit=$defaultalgoptit
+#algspecified=0
+pairspecified=0
+scorecalcopt=" "
+coreout=0
+corethr="0.5"
+corewin="100"
+coreext=" "
+outputformat="pir"
+f2clext="-N"
+outorder="input"
+seed="x"
+seedtable="x"
+auto=0
+groupsize=-1
+partsize=50
+partdist="ktuples"
+partorderopt=" -x "
+treeout=0
+distout=0
+treein=0
+topin=0
+treeinopt=" "
+seedfiles="/dev/null"
+seedtablefile="/dev/null"
+pdblist="/dev/null"
+ownlist="/dev/null"
+strdir="$PWD"
+aamatrix="/dev/null"
+treeinfile="/dev/null"
+rnascoremtx=" "
+laraparams="/dev/null"
+foldalignopt=" "
+treealg=" -X 0.1 "
+sueff="1.0"
+scoreoutarg=" "
+numthreads=0
+numthreadsit=-1
+numthreadstb=-1
+randomseed=0
+addfile="/dev/null"
+addarg0=" "
+addarg=" "
+addsinglearg=" "
+add2ndhalfarg=" "
+mapoutfile="/dev/null"
+fragment=0
+legacygapopt=" "
+mergetable="/dev/null"
+mergearg=" "
+seedoffset=0
+outnum=" "
+last_e=5000
+last_m=3
+last_subopt=" "
+last_once=" "
+adjustdirection=0
+tuplesize=6
+termgapopt=" -O "
+#termgapopt=" " # gap/gap ga kakenai node
+similarityoffset="0.0"
+unalignlevel="0.0"
+unalignspecified=0
+spfactor="100.0"
+shiftpenaltyspecified=0
+opdistspecified=0
+allowshift=0
+enrich=0
+enrichseq=0
+enrichstr=0
+seektarget=""
+fixthreshold="0.0"
+bunkatsuopt=" "
+npickup=0
+minimumweight="0.00001" # 2016/Mar
+usenaivepairscore=" "
+oldgenafparam=0
+sprigorous=0
+pileuporshuffle="l"
+initialramusage="20GB"
+focusarg=" "
+if [ $# -gt 0 ]; then
+ if [ "$1" = "--version" ]; then
+ echo "$version" 1>&2
+ exit 0;
+ elif [ "$1" = "--help" -o "$1" = "--info" ]; then
+ shift
+ er=1;
+ fi
+ while [ $# -gt 1 ];
+ do
+ if [ "$1" = "--auto" ]; then
+ auto=1
+ elif [ "$1" = "--anysymbol" ]; then
+ anysymbol=1
+ elif [ "$1" = "--preservecase" ]; then
+ anysymbol=1
+ elif [ "$1" = "--clustalout" ]; then
+ outputformat="clustal"
+ elif [ "$1" = "--phylipout" ]; then
+ outputformat="phylip"
+ elif [ "$1" = "--reorder" ]; then
+ outorder="aligned"
+ partorderopt=" "
+ elif [ "$1" = "--inputorder" ]; then
+ outorder="input"
+ partorderopt=" -x "
+ elif [ "$1" = "--unweight" ]; then
+ weightopt=" -u "
+ elif [ "$1" = "--termgappenalty" ]; then
+ termgapopt=" "
+ elif [ "$1" = "--alga" ]; then
+ algopt=" "
+ algoptit=" "
+# algspecified=1
+ elif [ "$1" = "--algq" ]; then
+ algopt=" -Q "
+ algoptit=" "
+ echo "" 1>&2
+ echo "--algq is no longer supported!" 1>&2
+ echo "" 1>&2
+ exit 1;
+# algspecified=1
+ elif [ "$1" = "--namelength" ]; then
+ shift
+ namelength=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify the length of name in clustal format output!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--groupsize" ]; then
+ shift
+ groupsize=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify groupsize!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--partsize" ]; then
+ shift
+ partsize=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify partsize!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--parttree" ]; then
+ distance="parttree"
+ partdist="ktuples"
+ elif [ "$1" = "--dpparttree" ]; then
+ distance="parttree"
+ partdist="localalign"
+ elif [ "$1" = "--fastaparttree" ]; then
+ distance="parttree"
+ partdist="fasta"
+ elif [ "$1" = "--treeout" ]; then
+ treeout=1
+ elif [ "$1" = "--distout" ]; then
+ distout=1
+ elif [ "$1" = "--fastswpair" ]; then
+ distance="fasta"
+ pairspecified=1
+ sw=1
+ elif [ "$1" = "--fastapair" ]; then
+ distance="fasta"
+ pairspecified=1
+ sw=0
+ elif [ "$1" = "--averagelinkage" ]; then
+ treealg=" -X 1.0 "
+ sueff="1.0"
+ elif [ "$1" = "--minimumlinkage" ]; then
+ treealg=" -X 0.0 "
+ sueff="0.0"
+ elif [ "$1" = "--mixedlinkage" ]; then
+ shift
+ sueff="$1"
+ treealg=" -X $1"
+ elif [ "$1" = "--noscore" ]; then
+ scorecalcopt=" -Z "
+ elif [ "$1" = "--6mermultipair" ]; then
+ distance="ktuplesmulti"
+ tuplesize=6
+ pairspecified=1
+ elif [ "$1" = "--10mermultipair" ]; then
+ distance="ktuplesmulti"
+ tuplesize=10
+ pairspecified=1
+ elif [ "$1" = "--6merpair" ]; then
+ distance="ktuples"
+ tuplesize=6
+ pairspecified=1
+ elif [ "$1" = "--10merpair" ]; then
+ distance="ktuples"
+ tuplesize=10
+ pairspecified=1
+ elif [ "$1" = "--blastpair" ]; then
+ distance="blast"
+ pairspecified=1
+ elif [ "$1" = "--lastmultipair" ]; then
+ distance="lastmulti"
+ pairspecified=1
+ elif [ "$1" = "--globalpair" ]; then
+ distance="global"
+ pairspecified=1
+ elif [ "$1" = "--shortlongpair" ]; then
+ distance="local"
+ usenaivepairscore="-Z"
+ laof=0.0 # addfull no tokini tsukawareru.
+ lexp=0.0 # addfull no tokini tsukawareru.
+ pgaof=0.0 # local nara iranai
+ pgexp=0.0 # local nara iranai
+ pairspecified=1
+ elif [ "$1" = "--longshortpair" ]; then
+ distance="local"
+ usenaivepairscore="-Z"
+ laof=0.0 # addfull no tokini tsukawareru.
+ lexp=0.0 # addfull no tokini tsukawareru.
+ pgaof=0.0 # local nara iranai
+ pgexp=0.0 # local nara iranai
+ pairspecified=1
+ elif [ "$1" = "--localpair" ]; then
+ distance="local"
+ pairspecified=1
+ elif [ "$1" = "--lastpair" ]; then
+ distance="last"
+ pairspecified=1
+ elif [ "$1" = "--multipair" ]; then
+ distance="multi"
+ pairspecified=1
+ elif [ "$1" = "--hybridpair" ]; then
+ distance="hybrid"
+ pairspecified=1
+ elif [ "$1" = "--scarnapair" ]; then
+ distance="scarna"
+ pairspecified=1
+ elif [ "$1" = "--dafspair" ]; then
+ distance="dafs"
+ pairspecified=1
+ elif [ "$1" = "--larapair" ]; then
+ distance="lara"
+ pairspecified=1
+ elif [ "$1" = "--slarapair" ]; then
+ distance="slara"
+ pairspecified=1
+ elif [ "$1" = "--foldalignpair" ]; then
+ distance="foldalignlocal"
+ pairspecified=1
+ elif [ "$1" = "--foldalignlocalpair" ]; then
+ distance="foldalignlocal"
+ pairspecified=1
+ elif [ "$1" = "--foldalignglobalpair" ]; then
+ distance="foldalignglobal"
+ pairspecified=1
+ elif [ "$1" = "--globalgenafpair" ]; then
+ distance="globalgenaf"
+ pairspecified=1
+ echo "" 1>&2
+ echo "--globalgenaf is no longer supported!" 1>&2
+ echo "" 1>&2
+ exit 1;
+ elif [ "$1" = "--localgenafpair" ]; then
+ distance="localgenaf"
+ pairspecified=1
+ elif [ "$1" = "--genafpair" ]; then
+ distance="localgenaf"
+ pairspecified=1
+ elif [ "$1" = "--oldgenafpair" ]; then
+ distance="localgenaf"
+ pairspecified=1
+ oldgenafparam=1
+ elif [ "$1" = "--memsave" ]; then
+ memopt=" -M -B " # -B (bunkatsunashi no riyu ga omoidasenai)
+ elif [ "$1" = "--nomemsave" ]; then
+ memopt=" -N "
+ elif [ "$1" = "--nuc" ]; then
+ seqtype=" -D "
+ elif [ "$1" = "--amino" ]; then
+ seqtype=" -P "
+ elif [ "$1" = "--fft" ]; then
+ fft=1
+ forcefft=1
+ elif [ "$1" = "--nofft" ]; then
+ fft=0
+ elif [ "$1" = "--quiet" ]; then
+ if [ $os = "mingw" ]; then
+ progressfile="nul"
+ else
+ progressfile="/dev/null"
+ fi
+ elif [ "$1" = "--debug" ]; then
+ debug=1
+ elif [ "$1" = "--coreext" ]; then
+ coreext=" -c "
+ elif [ "$1" = "--core" ]; then
+ coreout=1
+ elif [ "$1" = "--adjustdirection" ]; then
+ adjustdirection=1
+ elif [ "$1" = "--adjustdirectionaccurately" ]; then
+ adjustdirection=2
+ elif [ "$1" = "--progress" ]; then
+ shift
+ progressfile="$1"
+ if ! ( expr "$progressfile" : "\/" > /dev/null || expr "$progressfile" : "[A-Za-z]\:" > /dev/null ) ; then
+ echo "Specify a progress file name with the absolute path!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--out" ]; then
+ shift
+ outputfile="$1"
+ elif [ "$1" = "--thread" ]; then
+ shift
+ if ! expr "$1" : "[0-9\-]" > /dev/null ; then
+ echo "Specify the number of threads. Or, use --thread -1" 1>&2
+ exit
+ fi
+ numthreads=`expr "$1" - 0`
+ elif [ "$1" = "--threadtb" ]; then
+ shift
+ if ! expr "$1" : "[0-9\-]" > /dev/null ; then
+ echo "Specify the number of threads for the iterative step!" 1>&2
+ exit
+ fi
+ numthreadstb=`expr "$1" - 0`
+ elif [ "$1" = "--threadit" ]; then
+ shift
+ if ! expr "$1" : "[0-9\-]" > /dev/null ; then
+ echo "Specify the number of threads for the iterative step!" 1>&2
+ exit
+ fi
+ numthreadsit=`expr "$1" - 0`
+ elif [ "$1" = "--last_subopt" ]; then
+ last_subopt="-S"
+ elif [ "$1" = "--last_once" ]; then
+ last_once="-U"
+ elif [ "$1" = "--last_m" ]; then
+ shift
+ last_m=`expr "$1" - 0`
+ elif [ "$1" = "--last_e" ]; then
+ shift
+ last_e=`expr "$1" - 0`
+ elif [ "$1" = "--randomseed" ]; then
+ shift
+ randomseed=`expr "$1" - 0`
+ elif [ "$1" = "--bestfirst" ]; then
+ parallelizationstrategy="BESTFIRST"
+ elif [ "$1" = "--adhoc0" ]; then
+ parallelizationstrategy="BAATARI0"
+ elif [ "$1" = "--adhoc1" ]; then
+ parallelizationstrategy="BAATARI1"
+ elif [ "$1" = "--adhoc2" ]; then
+ parallelizationstrategy="BAATARI2"
+ elif [ "$1" = "--simplehillclimbing" ]; then
+ parallelizationstrategy="BAATARI2"
+ elif [ "$1" = "--scoreout" ]; then
+ scoreoutarg="-S -B"
+ elif [ "$1" = "--outnum" ]; then
+ outnum="-n"
+ elif [ "$1" = "--leavegappyregion" ]; then
+ legacygapopt="-L"
+ elif [ "$1" = "--legacygappenalty" ]; then
+ legacygapopt="-L"
+ elif [ "$1" = "--merge" ]; then
+ shift
+ mergetable="$1"
+ if [ ! -e "$mergetable" ]; then
+ echo "Cannot open $mergetable" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--addprofile" ]; then
+ shift
+ addarg0="-I"
+ addfile="$1"
+ elif [ "$1" = "--add" ]; then
+ shift
+ addarg0="-K -I"
+ addfile="$1"
+ elif [ "$1" = "--addfragments" ]; then
+ shift
+ addarg0="-K -I"
+ addfile="$1"
+ fragment=1
+ elif [ "$1" = "--addfull" ]; then
+ shift
+ addarg0="-K -I"
+ addfile="$1"
+ fragment=-1
+ elif [ "$1" = "--addlong" ]; then
+ shift
+ addarg0="-K -I"
+ addfile="$1"
+ fragment=-2
+ elif [ "$1" = "--smoothing" ]; then
+ add2ndhalfarg=$add2ndhalfarg" -p "
+ elif [ "$1" = "--keeplength" ]; then
+ add2ndhalfarg=$add2ndhalfarg" -Y "
+ elif [ "$1" = "--mapout" ]; then
+ add2ndhalfarg=$add2ndhalfarg" -Z -Y "
+ elif [ "$1" = "--mapoutfile" ]; then
+ shift
+ add2ndhalfarg=$add2ndhalfarg" -Z -Y "
+ mapoutfile="$1"
+ elif [ "$1" = "--maxiterate" ]; then
+ shift
+ iterate=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify the number of iterations!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--retree" ]; then
+ shift
+ cycle=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify the number of tree rebuilding!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--text" ]; then
+ sbstmodel=" -b -2 -a "
+ f2clext="-E"
+ seqtype="-P"
+ fft=0
+ elif [ "$1" = "--aamatrix" ]; then
+ shift
+ sbstmodel=" -b -1 "
+ aamatrix="$1"
+ if [ ! -e "$aamatrix" ]; then
+ echo "Cannot open $aamatrix" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--treein" ]; then
+ shift
+ treeinopt=" -U "
+ treein=1
+ treeinfile="$1"
+ if [ ! -e "$treeinfile" ]; then
+ echo "Cannot open $treeinfile" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--pileup" ]; then
+ treeinopt=" -U "
+ treein=1
+ pileuporshuffle="p"
+ elif [ "$1" = "--randomchain" ]; then
+ treeinopt=" -U "
+ treein=1
+ pileuporshuffle="s"
+ elif [ "$1" = "--topin" ]; then
+ shift
+ treeinopt=" -V "
+ treein=1
+ treeinfile="$1"
+ echo "The --topin option has been disabled." 1>&2
+ echo "There was a bug in version < 6.530." 1>&2
+ echo "This bug has not yet been fixed." 1>&2
+ exit 1
+ elif [ "$1" = "--memsavetree" ]; then
+ treeinopt=" -U "
+ treein=1
+ pileuporshuffle="C"
+ elif [ "$1" = "--memsavetreex" ]; then
+ treeinopt=" -U "
+ treein=1
+ pileuporshuffle="c"
+ elif [ "$1" = "--initialramusage" ]; then
+ shift
+ treeinopt=" -U "
+ treein=1
+ initialramusage="$1"
+ pileuporshuffle="c"
+ elif [ "$1" = "--kappa" ]; then
+ shift
+ kappa=" -k $1 "
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify kappa value!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--fmodel" ]; then
+ fmodel=" -a "
+ elif [ "$1" = "--nwildcard" ]; then
+ nmodel=" -: "
+ elif [ "$1" = "--nzero" ]; then
+ nmodel=" "
+ elif [ "$1" = "--jtt" ]; then
+ shift
+ sbstmodel=" -j $1"
+# if ! expr "$1" : "[0-9]" > /dev/null ; then
+# echo "Specify pam value!" 1>&2
+# exit
+# fi
+ elif [ "$1" = "--kimura" ]; then
+ shift
+ sbstmodel=" -j $1"
+# if ! expr "$1" : "[0-9]" > /dev/null ; then
+# echo "Specify pam value!" 1>&2
+# exit
+# fi
+ elif [ "$1" = "--tm" ]; then
+ shift
+ sbstmodel=" -m $1"
+# if ! expr "$1" : "[0-9]" > /dev/null ; then
+# echo "Specify pam value!" 1>&2
+# exit
+# fi
+ elif [ "$1" = "--bl" ]; then
+ shift
+ sbstmodel=" -b $1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "blosum $1?" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--weighti" ]; then
+ shift
+ weighti="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify weighti value!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--weightr" ]; then
+ shift
+ weightr="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify weightr value!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--weightm" ]; then
+ shift
+ weightm="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify weightm value!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--rnaalifold" ]; then
+ rnaalifold=1
+ elif [ "$1" = "--mccaskill" ]; then
+ mccaskill=1
+ contrafold=0
+ dafs=0
+ elif [ "$1" = "--contrafold" ]; then
+ mccaskill=0
+ contrafold=1
+ dafs=0
+ elif [ "$1" = "--dafs" ]; then
+ mccaskill=0
+ contrafold=0
+ dafs=1
+ elif [ "$1" = "--ribosum" ]; then
+ rnascoremtx=" -s "
+ elif [ "$1" = "--op" ]; then
+ shift
+ gop="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify op!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--opdist" ]; then
+ shift
+ gopdist="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify opdist!" 1>&2
+ exit
+ fi
+ opdistspecified=1
+ elif [ "$1" = "--allowshift" ]; then
+ allowshift=1
+ elif [ "$1" = "--shiftpenalty" ]; then
+ shift
+ spfactor="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify sf!" 1>&2
+ exit
+ fi
+ shiftpenaltyspecified=1
+ elif [ "$1" = "--ep" ]; then
+ shift
+# aof="$1"
+ tmpval="$1"
+ aof=`awk "BEGIN{ print -1.0 * \"$tmpval\"}"`
+ if ! expr "$aof" : "[0-9\-]" > /dev/null ; then
+ printf "\nSpecify a number for ep, like --ep 0.1\n" 1>&2
+ printf "'$1' cannot be interpreted as a number..\n\n" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--rop" ]; then
+ shift
+ rgop="$1"
+# Atode check
+ elif [ "$1" = "--rep" ]; then
+ shift
+ rgep="$1"
+ elif [ "$1" = "--lop" ]; then
+ shift
+ lgop="$1"
+ elif [ "$1" = "--LOP" ]; then
+ shift
+ LGOP="$1"
+ elif [ "$1" = "--lep" ]; then
+ shift
+ laof="$1"
+ elif [ "$1" = "--lexp" ]; then
+ shift
+ lexp="$1"
+ elif [ "$1" = "--LEXP" ]; then
+ shift
+ LEXP="$1"
+ elif [ "$1" = "--GEXP" ]; then
+ shift
+ GEXP="$1"
+ elif [ "$1" = "--GOP" ]; then
+ shift
+ GGOP="$1"
+ elif [ "$1" = "--gop" ]; then
+ shift
+ pggop="$1"
+ elif [ "$1" = "--gep" ]; then
+ shift
+ pgaof="$1"
+ elif [ "$1" = "--gexp" ]; then
+ shift
+ pgexp="$1"
+ elif [ "$1" = "--laraparams" ]; then
+ shift
+ laraparams="$1"
+ elif [ "$1" = "--corethr" ]; then
+ shift
+ corethr="$1"
+ elif [ "$1" = "--corewin" ]; then
+ shift
+ corewin="$1"
+ elif [ "$1" = "--strdir" ]; then
+ shift
+ strdir="$1"
+ elif [ "$1" = "--pdbidlist" ]; then
+ shift
+ pdblist="$1"
+ if [ ! -e "$pdblist" ]; then
+ echo "Cannot open $pdblist" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--pdbfilelist" ]; then
+ shift
+ ownlist="$1"
+ if [ ! -e "$ownlist" ]; then
+ echo "Cannot open $ownlist" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--enrich" ]; then
+ enrich=1
+ enrichseq=1
+ enrichstr=1
+ seektarget=""
+ elif [ "$1" = "--enrichseq" ]; then
+ enrich=1
+ enrichseq=1
+ enrichstr=0
+ seektarget="-seq"
+ elif [ "$1" = "--enrichstr" ]; then
+ enrich=1
+ enrichseq=0
+ enrichstr=1
+ seektarget="-str"
+ elif [ "$1" = "--seedtable" ]; then
+ shift
+ seedtable="y"
+ seedtablefile="$1"
+ elif [ "$1" = "--seed" ]; then
+ shift
+ seed="m"
+ seedfiles="$seedfiles $1"
+ elif [ "$1" = "--minimumweight" ]; then
+ shift
+ minimumweight="$1"
+ elif [ "$1" = "--similaritylevel" ]; then
+ shift
+ similarityoffset="$1"
+ elif [ "$1" = "--unalignlevel" ]; then
+ shift
+ unalignlevel="$1"
+ unalignspecified=1
+ elif [ "$1" = "--skipiterate" ]; then
+ shift
+ fixthreshold="$1"
+ elif [ "$1" = "--bunkatsunashi" ]; then
+ bunkatsuopt=" -B "
+ elif [ "$1" = "--sp" ]; then
+ sprigorous=1
+ elif [ "$1" = "--focus" ]; then
+ focusarg=" -= "
+ elif [ "$1" = "--sparsepickup" ]; then
+ shift
+ npickup="$1"
+ elif [ $progname = "fftns" -o $progname = "nwns" ]; then
+ if [ "$1" -gt 0 ]; then
+ cycle=`expr "$1" - 0`
+ fi
+ else
+ echo "Unknown option: $1" 1>&2
+ er=1;
+# exit 1;
+ fi
+ shift
+ done;
+
+
+ echo "" 1>"$progressfile"
+
+# TMPFILE=/tmp/$progname.$$
+ TMPFILE=`mktemp -dt $progname.XXXXXXXXXX`
+ if [ $? -ne 0 ]; then
+ echo "mktemp seems to be obsolete. Re-trying without -t" 1>&2
+ TMPFILE=`mktemp -d /tmp/$progname.XXXXXXXXXX`
+ fi
+
+# if [ $os = "cygwin" ]; then
+# TMPFILE=`cygpath -w $TMPFILE` unnecessary for cygwin2.7
+# fi
+
+ umask 077
+# mkdir $TMPFILE || er=1
+ if [ $debug -eq 1 ]; then
+# trap "tar cfvz debuginfo.tgz $TMPFILE; rm -rf $TMPFILE " 0 # does not work in msys
+ trap "tar cfv - $TMPFILE | gzip -c > debuginfo.tgz; rm -rf $TMPFILE " 0
+ else
+ trap "rm -rf $TMPFILE" 0
+ fi
+ if [ $# -eq 1 ]; then
+ if [ -r "$1" -o "$1" = - ]; then
+
+ if [ -r "$addfile" ]; then
+ printf '';
+ else
+ echo "$0": Cannot open "$addfile". 1>&2
+ echo "" 1>&2
+ exit 1;
+ fi
+
+ cat "$1" | tr "\r" "\n" > $TMPFILE/infile
+ echo "" >> $TMPFILE/infile
+ cat "$addfile" | tr "\r" "\n" | grep -v "^$" >> $TMPFILE/infile
+ cat "$addfile" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_addfile
+ cat "$aamatrix" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_aamtx
+ cat "$mergetable" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_subalignmentstable
+ cat "$treeinfile" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_guidetree
+ cat "$seedtablefile" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_seedtablefile
+ cat "$laraparams" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_lara.params
+ cat "$pdblist" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/pdblist
+ cat "$ownlist" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/ownlist
+
+# echo $seedfiles
+ infilename="$1"
+ seedfilesintmp="/dev/null"
+ seednseq="0"
+ set $seedfiles > /dev/null
+ while [ $# -gt 1 ];
+ do
+ shift
+ if [ -r "$1" ]; then
+ cat "$1" | tr "\r" "\n" > $TMPFILE/seed$#
+ else
+ echo "$0": Cannot open "$1". 1>&2
+ echo "" 1>&2
+ exit 1;
+ fi
+ seednseq=$seednseq" "`grep -c '^[>|=]' $TMPFILE/seed$#`
+ seedfilesintmp=$seedfilesintmp" "seed$#
+ done
+# ls $TMPFILE
+# echo $seedfilesintmp
+# echo $seednseq
+
+
+ else
+ echo "$0": Cannot open "$1". 1>&2
+ echo "" 1>&2
+ er=1
+# exit 1;
+ fi
+ else
+# echo '$#'"=$#" 1>&2
+ er=1
+ fi
+
+
+
+ if [ $numthreads -lt 0 ]; then
+ if [ $os = "linux" ]; then
+ nlogicalcore=`cat /proc/cpuinfo | grep "^processor" | uniq | wc -l`
+ ncoresinacpu=`cat /proc/cpuinfo | grep 'cpu cores' | uniq | awk '{print $4}'`
+ nphysicalcpu=`cat /proc/cpuinfo | grep 'physical id' | sort | uniq | wc -l`
+ if [ $nlogicalcore -eq 0 ]; then
+ echo "Cannot get the number of processors from /proc/cpuinfo" 1>>"$progressfile"
+ exit 1
+ fi
+ if [ ${#ncoresinacpu} -gt 0 -a $nphysicalcpu -gt 0 ]; then
+ numthreads=`expr $ncoresinacpu '*' $nphysicalcpu`
+# if [ $nlogicalcore -gt $numthreads ]; then # Hyperthreading
+# numthreads=`expr $numthreads '+' 1`
+# fi
+ else
+ numthreads=$nlogicalcore
+ fi
+ elif [ $os = "darwin" ]; then
+ numthreads=`sysctl -n hw.physicalcpu`
+ if [ -z $numthreads ]; then
+ echo "Cannot get the number of physical cores from sysctl" 1>>"$progressfile"
+ exit 1
+ fi
+# nlogicalcore=`sysctl -n hw.logicalcpu`
+# if [ $nlogicalcore -gt $numthreads ]; then # Hyperthreading
+# numthreads=`expr $numthreads '+' 1`
+# fi
+ elif [ $os = "mingw" -o $os = "cygwin" ]; then
+ numthreads=`wmic cpu get NumberOfCores | head -2 | tail -1 | awk '{print $1}'`
+ else
+ echo "Cannot count the number of physical cores." 1>>"$progressfile"
+ exit 1
+ fi
+ echo "OS = "$os 1>>"$progressfile"
+ echo "The number of physical cores = " $numthreads 1>>"$progressfile"
+ fi
+
+ if [ $numthreadstb -lt 0 ]; then
+ numthreadstb=$numthreads
+ fi
+
+ if [ $numthreadsit -lt 0 ]; then
+ if [ $numthreads -lt 11 ]; then
+ numthreadsit=$numthreads
+ else
+ numthreadsit=10
+ fi
+ fi
+
+ if [ $numthreadsit -eq 0 -a $parallelizationstrategy = "BESTFIRST" ]; then
+ echo 'Impossible' 1>&2;
+ exit 1;
+ fi
+
+ if [ "$addarg0" != " " ]; then
+ iterate=0 # 2013/03/23
+ "$prefix/countlen" < $TMPFILE/_addfile > $TMPFILE/addsize 2>>"$progressfile"
+ nadd=`awk '{print $1}' $TMPFILE/addsize`
+ if [ $nadd -eq "0" ]; then
+ echo Check $addfile 1>&2
+ exit 1;
+ fi
+ if [ $seed != "x" -o $seedtable != "x" ]; then
+ echo 'Impossible' 1>&2;
+ echo 'Use either ONE of --seed, --seedtable, --addprofile and --add.' 1>&2
+ exit 1;
+ fi
+ else
+ nadd="0"
+ fi
+
+ if [ $auto -eq 1 ]; then
+ "$prefix/countlen" < $TMPFILE/infile > $TMPFILE/size 2>>"$progressfile"
+ nseq=`awk '{print $1}' $TMPFILE/size`
+ nlen=`awk '{print $3}' $TMPFILE/size`
+
+ if [ $nlen -lt 3000 -a $nseq -lt 100 ]; then
+ distance="local"
+ iterate=1000
+ cycle=1
+ elif [ $nlen -lt 1000 -a $nseq -lt 200 ]; then
+ distance="local"
+ iterate=2
+ cycle=1
+ elif [ $nlen -lt 10000 -a $nseq -lt 500 ]; then
+ distance="ktuples"
+ iterate=2
+ cycle=2
+ elif [ $nseq -lt 50000 ]; then # changed from 10000 2014/Oct/4
+ distance="ktuples"
+ iterate=0
+ cycle=2
+ elif [ $nseq -lt 90000 ]; then # changed from 30000 2014/Oct/4
+ distance="ktuples"
+ iterate=0
+ cycle=1
+ elif [ $nlen -lt 3000 ]; then
+ distance="parttree"
+ partdist="localalign"
+ algopt=" "
+ algoptit=" "
+# algspecified=1
+ cycle=1
+ else
+ distance="parttree"
+ partdist="ktuples"
+ algopt=" "
+ algoptit=" "
+# algspecified=1
+ cycle=1
+ fi
+
+
+# if [ $nlen -lt 3000 -a $nseq -lt 100 ]; then
+# distance="local"
+# iterate=1000
+# cycle=1
+# elif [ $nlen -lt 1000 -a $nseq -lt 200 ]; then
+# distance="local"
+# iterate=2
+# cycle=1
+# elif [ $nlen -lt 10000 -a $nseq -lt 500 ]; then
+# distance="ktuples"
+# iterate=2
+# cycle=2
+# elif [ $nseq -lt 200000 ]; then
+# distance="ktuples"
+# iterate=0
+# treeinopt=" -U "
+# treein=1
+# pileuporshuffle="a"
+# elif [ $nlen -lt 3000 ]; then
+# distance="parttree"
+# partdist="localalign"
+# algopt=" "
+# algoptit=" "
+## algspecified=1
+# cycle=1
+# else
+# distance="parttree"
+# partdist="ktuples"
+# algopt=" "
+# algoptit=" "
+## algspecified=1
+# cycle=1
+# fi
+
+
+ if [ $fragment -ne 0 ]; then
+ norg=`expr $nseq '-' $nadd`
+ npair=`expr $norg '*' $nadd`
+ echo "nadd = " $nadd 1>>"$progressfile"
+ echo "npair = " $npair 1>>"$progressfile"
+ echo "nseq = " $nseq 1>>"$progressfile"
+ echo "nlen = " $nlen 1>>"$progressfile"
+# nagasa check!
+#
+ if [ $npair -gt 10000000 -o $nlen -gt 500000 ]; then # 2015/Jun
+ distance="ktuples"
+ echo "use ktuples, size=$tuplesize!" 1>>"$progressfile"
+ elif [ $npair -gt 3000000 -o $nlen -gt 100000 ]; then # 2015/Jun
+ distance="multi"
+ weighti="0.0"
+ echo "use multipair, weighti=0.0!" 1>>"$progressfile"
+ else
+ distance="multi"
+ echo "use multipair, weighti=$weighti!" 1>>"$progressfile"
+ fi
+ pairspecified=1
+ fi
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$sueff\" < 0.0 || 0.0+\"$sueff\" > 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "The argument of --mixedlinkage must be between 0.0 and 1.0" 1>>"$progressfile"
+ exit 1;
+ fi
+
+ if [ $allowshift -eq 1 ]; then
+ if [ $unalignspecified -ne 1 ]; then
+ unalignlevel="0.8"
+ fi
+ if [ $shiftpenaltyspecified -ne 1 ]; then
+ spfactor="2.00"
+ fi
+ fi
+
+ if [ $opdistspecified -ne 1 ]; then
+ gopdist=$gop
+ fi
+
+ if [ $unalignlevel != "0.0" -o `awk "BEGIN {print( 0.0+\"$spfactor\" < 100.0 )}"` -gt 0 ]; then
+ nmodel=" -: "
+ termgapopt=" "
+ if [ $distance = "localgenaf" ]; then
+ printf "\n%s\n" "The combination of --allowshift and --genafpair (E-INS-i/-1) is not supported." 1>>"$progressfile"
+ printf "%s\n" "Instead, please try --allowshift --globalpair (G-INS-i/-1 in the web version)," 1>>"$progressfile"
+ printf "%s\n\n" "which covers the situation for --genafpair (E-INS-i/-1), too." 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ $distance != "global" -o `awk "BEGIN {print( 0.0+\"$weighti\" < 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "At present, --unalignlevel # or --allowshift is supported only with the --globalpair option." 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ $fragment -ne 0 ]; then
+ printf "\n%s\n\n" "At present, --unalignlevel # or --allowshift is not supported with the --addfragments option." 1>>"$progressfile"
+ exit 1;
+ fi
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$spfactor\" < 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n" "shiftpenalty must be >1." 1>>"$progressfile"
+ exit 1;
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$fixthreshold\" < 0.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "The 'fix' parameter must be >= 0.0" 1>>"$progressfile"
+ exit 1;
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$unalignlevel\" < 0.0 || 0.0+\"$unalignlevel\" > 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "The 'unalignlevel' parameter must be between 0.0 and 1.0" 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ `awk "BEGIN {print( 0.0+\"$unalignlevel\" > 0.0 )}"` -gt 0 ]; then
+ laof="0"
+ lexp="0"
+ pgaof="0"
+ pgexp="0"
+ LEXP="0"
+ GEXP="0"
+ termgapopt=" "
+# if [ $auto -eq 1 -o $fragment -ne 0 -o $iterate -gt 0 ]; then
+ if [ $fragment -ne 0 ]; then
+ printf "\n%s\n\n" "At present, the 'unalignlevel > 0' mode is not supported with the --addfragments option." 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ $distance = "parttree" ]; then
+ printf "\n%s\n\n" "At present, the 'unalignlevel > 0' mode is not supported in the (dp)parttree option." 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ $distance = "localgenaf" ]; then
+ printf "\n%s\n" "The --genafpair is not supported in the 'unalignlevel > 0' mode." 1>>"$progressfile"
+ printf "%s\n" "Instead, please try --unalignlevel xx --globalpair," 1>>"$progressfile"
+ printf "%s\n\n" "which covers the situation for --genafpair (E-INS-i), too." 1>>"$progressfile"
+ exit 1;
+ fi
+# if [ $distance != "ktuples" -a `awk "BEGIN {print( 0.0+\"$weighti\" > 0.0 )}"` -gt 0 -a $iterate -gt 0 ]; then
+# printf "\n%s\n\n" "Please add --weighti 0.0, for now." 1>>"$progressfile"
+# exit 1;
+# fi
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$similarityoffset\" != 0.0 && 0.0+\"$unalignlevel\" != 0.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "Do not simultaneously specify --similaritylevel and --unalignlevel" 1>>"$progressfile"
+ exit 1;
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$similarityoffset\" < -1.0 || 0.0+\"$similarityoffset\" > 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "Similarity must be between -1.0 and +1.0" 1>>"$progressfile"
+ exit 1;
+ fi
+ aof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $aof}"`
+ laof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $laof}"`
+ pgaof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $pgaof}"`
+
+
+ if [ $parallelizationstrategy = "BESTFIRST" -o $parallelizationstrategy = "BAATARI0" ]; then
+ iteratelimit=254
+ else
+ iteratelimit=16
+ fi
+ if [ $iterate -gt $iteratelimit ]; then #??
+ iterate=$iteratelimit
+ fi
+
+ if [ $rnaalifold -eq 1 ]; then
+ rnaopt=" -e $rgep -o $rgop -c $weightm -r $weightr -R $rnascoremtx "
+# rnaoptit=" -o $rgop -BT -c $weightm -r $weightr -R "
+ rnaoptit=" -o $rgop -F -c $weightm -r $weightr -R "
+ elif [ $mccaskill -eq 1 -o $dafs -eq 1 -o $contrafold -eq 1 ]; then
+ rnaopt=" -o $rgop -c $weightm -r $weightr "
+# rnaoptit=" -e $rgep -o $rgop -BT -c $weightm -r $weightr $rnascoremtx "
+ rnaoptit=" -e $rgep -o $rgop -F -c $weightm -r $weightr $rnascoremtx "
+ else
+ rnaopt=" "
+ rnaoptit=" -F "
+ fi
+
+# if [ $algspecified -eq 0 ]; then
+# if [ $distance = "parttree" ]; then
+# algopt=" -Q "
+# algoptit=" "
+# else
+# algopt=" "
+# algoptit=" "
+# fi
+# fi
+
+ if [ $sprigorous -eq 1 ]; then
+ algopt=" -@ "
+ if [ $iterate -gt 0 ]; then
+ if [ $numthreadsit -eq 0 ]; then
+ algoptit=" -@ -B -Z -z 1000 "
+ else
+ echo "" 1>>"$progressfile"
+ echo "At present, the combination of --sp and iterative refinement is supported only in a single thread." 1>>"$progressfile"
+ echo "Please try \"--thread -1 --threadit 0\", which runs the iterative refinment calculation on a single thread." 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ exit 1;
+# algoptit=" -@ -B -z 1000 "
+ fi
+ fi
+ termgapopt=" "
+ fft=0
+ memopt=" -N "
+ fi
+
+ model="$sbstmodel $kappa $fmodel $nmodel"
+
+ if [ $er -eq 1 ]; then
+ echo "------------------------------------------------------------------------------" 1>&2
+ echo " MAFFT" $version 1>&2
+# echo "" 1>&2
+# echo " Input format: fasta" 1>&2
+# echo "" 1>&2
+# echo " Usage: `basename $0` [options] inputfile > outputfile" 1>&2
+ echo " http://mafft.cbrc.jp/alignment/software/" 1>&2
+ echo " MBE 30:772-780 (2013), NAR 30:3059-3066 (2002)" 1>&2
+# echo "------------------------------------------------------------------------------" 1>&2
+# echo " % mafft in > out" 1>&2
+ echo "------------------------------------------------------------------------------" 1>&2
+# echo "" 1>&2
+ echo "High speed:" 1>&2
+ echo " % mafft in > out" 1>&2
+ echo " % mafft --retree 1 in > out (fast)" 1>&2
+ echo "" 1>&2
+ echo "High accuracy (for <~200 sequences x <~2,000 aa/nt):" 1>&2
+ echo " % mafft --maxiterate 1000 --localpair in > out (% linsi in > out is also ok)" 1>&2
+ echo " % mafft --maxiterate 1000 --genafpair in > out (% einsi in > out)" 1>&2
+ echo " % mafft --maxiterate 1000 --globalpair in > out (% ginsi in > out)" 1>&2
+ echo "" 1>&2
+ echo "If unsure which option to use:" 1>&2
+ echo " % mafft --auto in > out" 1>&2
+ echo "" 1>&2
+# echo "Other options:" 1>&2
+ echo "--op # : Gap opening penalty, default: 1.53" 1>&2
+ echo "--ep # : Offset (works like gap extension penalty), default: 0.0" 1>&2
+ echo "--maxiterate # : Maximum number of iterative refinement, default: 0" 1>&2
+ echo "--clustalout : Output: clustal format, default: fasta" 1>&2
+ echo "--reorder : Outorder: aligned, default: input order" 1>&2
+ echo "--quiet : Do not report progress" 1>&2
+ echo "--thread # : Number of threads (if unsure, --thread -1)" 1>&2
+# echo "" 1>&2
+# echo " % mafft --maxiterate 1000 --localpair in > out (L-INS-i)" 1>&2
+# echo " most accurate in many cases, assumes only one alignable domain" 1>&2
+# echo "" 1>&2
+# echo " % mafft --maxiterate 1000 --genafpair in > out (E-INS-i)" 1>&2
+# echo " works well if many unalignable residues exist between alignable domains" 1>&2
+# echo "" 1>&2
+# echo " % mafft --maxiterate 1000 --globalpair in > out (G-INS-i)" 1>&2
+# echo " suitable for globally alignable sequences " 1>&2
+# echo "" 1>&2
+# echo " % mafft --maxiterate 1000 in > out (FFT-NS-i)" 1>&2
+# echo " accurate and slow, iterative refinement method " 1>&2
+# echo "" 1>&2
+# echo "If the input sequences are long (~1,000,000nt)," 1>&2
+# echo " % mafft --retree 1 --memsave --fft in > out (FFT-NS-1-memsave, new in v5.8)" 1>&2
+# echo "" 1>&2
+# echo "If many (~5,000) sequences are to be aligned," 1>&2
+# echo "" 1>&2
+# echo " % mafft --retree 1 [--memsave] --nofft in > out (NW-NS-1, new in v5.8)" 1>&2
+# echo "" 1>&2
+# echo " --localpair : All pairwise local alignment information is included" 1>&2
+# echo " to the objective function, default: off" 1>&2
+# echo " --globalpair : All pairwise global alignment information is included" 1>&2
+# echo " to the objective function, default: off" 1>&2
+# echo " --op # : Gap opening penalty, default: $defaultgop " 1>&2
+# echo " --ep # : Offset (works like gap extension penalty), default: $defaultaof " 1>&2
+# echo " --bl #, --jtt # : Scoring matrix, default: BLOSUM62" 1>&2
+# echo " Alternatives are BLOSUM (--bl) 30, 45, 62, 80, " 1>&2
+# echo " or JTT (--jtt) # PAM. " 1>&2
+# echo " --nuc or --amino : Sequence type, default: auto" 1>&2
+# echo " --retree # : The number of tree building in progressive method " 1>&2
+# echo " (see the paper for detail), default: $defaultcycle " 1>&2
+# echo " --maxiterate # : Maximum number of iterative refinement, default: $defaultiterate " 1>&2
+# if [ $defaultfft -eq 1 ]; then
+# echo " --fft or --nofft: FFT is enabled or disabled, default: enabled" 1>&2
+# else
+# echo " --fft or --nofft: FFT is enabled or disabled, default: disabled" 1>&2
+# fi
+# echo " --memsave: Memory saving mode" 1>&2
+# echo " (for long genomic sequences), default: off" 1>&2
+# echo " --clustalout : Output: clustal format, default: fasta" 1>&2
+# echo " --reorder : Outorder: aligned, default: input order" 1>&2
+# echo " --quiet : Do not report progress" 1>&2
+# echo "-----------------------------------------------------------------------------" 1>&2
+ exit 1;
+ fi
+ if [ $sw -eq 1 ]; then
+ swopt=" -A "
+ else
+ swopt=" "
+ fi
+
+ if [ $distance = "fasta" -o $partdist = "fasta" ]; then
+ if [ ! "$FASTA_4_MAFFT" ]; then
+ FASTA_4_MAFFT=`which fasta34`
+ fi
+
+ if [ ! -x "$FASTA_4_MAFFT" ]; then
+ echo "" 1>&2
+ echo "== Install FASTA ========================================================" 1>&2
+ echo "This option requires the fasta34 program (FASTA version x.xx or higher)" 1>&2
+ echo "installed in your PATH. If you have the fasta34 program but have renamed" 1>&2
+ echo "(like /usr/local/bin/myfasta), set the FASTA_4_MAFFT environment variable" 1>&2
+ echo "to point your fasta34 (like setenv FASTA_4_MAFFT /usr/local/bin/myfasta)." 1>&2
+ echo "=========================================================================" 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+ fi
+ if [ $distance = "last" -o $distance = "lastmulti" ]; then
+ if [ ! -x "$prefix/lastal" -o ! -x "$prefix/lastdb" ]; then
+ echo "" 1>&2
+ echo "== Install LAST ============================================================" 1>&2
+ echo "LAST (Kielbasa, Wan, Sato, Horton, Frith 2011 Genome Res. 21:487) is required." 1>&2
+ echo "http://last.cbrc.jp/" 1>&2
+ echo "http://mafft.cbrc.jp/alignment/software/xxxxxxx.html " 1>&2
+ echo "============================================================================" 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+ fi
+ if [ $distance = "lara" -o $distance = "slara" ]; then
+ if [ ! -x "$prefix/mafft_lara" ]; then
+ echo "" 1>&2
+ echo "== Install LaRA =========================================================" 1>&2
+ echo "This option requires LaRA (Bauer et al. http://www.planet-lisa.net/)." 1>&2
+ echo "The executable have to be renamed to 'mafft_lara' and installed into " 1>&2
+ echo "the $prefix directory. " 1>&2
+ echo "A configuration file of LaRA also have to be given" 1>&2
+ echo "mafft-xinsi --larapair --laraparams parameter_file" 1>&2
+ echo "mafft-xinsi --slarapair --laraparams parameter_file" 1>&2
+ echo "=========================================================================" 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+ if [ ! -s "$laraparams" ]; then
+ echo "" 1>&2
+ echo "== Configure LaRA =======================================================" 1>&2
+ echo "A configuration file of LaRA have to be given" 1>&2
+ echo "mafft-xinsi --larapair --laraparams parameter_file" 1>&2
+ echo "mafft-xinsi --slarapair --laraparams parameter_file" 1>&2
+ echo "=========================================================================" 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+ fi
+ if [ $distance = "foldalignlocal" -o $distance = "foldalignglobal" ]; then
+ if [ ! -x "$prefix/foldalign210" ]; then
+ echo "" 1>&2
+ echo "== Install FOLDALIGN ====================================================" 1>&2
+ echo "This option requires FOLDALIGN (Havgaard et al. http://foldalign.ku.dk/)." 1>&2
+ echo "The executable have to be renamed to 'foldalign210' and installed into " 1>&2
+ echo "the $prefix directory. " 1>&2
+ echo "=========================================================================" 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+ fi
+ if [ $distance = "scarna" -o $mccaskill -eq 1 ]; then
+ if [ ! -x "$prefix/mxscarnamod" ]; then
+ echo "" 1>&2
+ echo "== Install MXSCARNA ======================================================" 1>&2
+ echo "MXSCARNA (Tabei et al. BMC Bioinformatics 2008 9:33) is required." 1>&2
+ echo "Please 'make' at the 'extensions' directory of the MAFFT source package," 1>&2
+ echo "which contains the modified version of MXSCARNA." 1>&2
+ echo "http://mafft.cbrc.jp/alignment/software/source.html " 1>&2
+ echo "==========================================================================" 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+ fi
+ if [ $distance = "dafs" -o $dafs -eq 1 ]; then
+ if [ ! -x "$prefix/dafs" ]; then
+ echo "" 1>&2
+ echo "== Install DAFS===========================================================" 1>&2
+ echo "DAFS (Sato et al. Journal 2012 issue:page) is required." 1>&2
+ echo "http://www.ncrna.org/ " 1>&2
+ echo "==========================================================================" 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+ fi
+ if [ $contrafold -eq 1 ]; then
+ if [ ! -x "$prefix/contrafold" ]; then
+ echo "" 1>&2
+ echo "== Install CONTRAfold ===================================================" 1>&2
+ echo "This option requires CONTRAfold" 1>&2
+ echo "(Do et al. http://contra.stanford.edu/contrafold/)." 1>&2
+ echo "The executable 'contrafold' have to be installed into " 1>&2
+ echo "the $prefix directory. " 1>&2
+ echo "=========================================================================" 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+ fi
+
+#old
+# if [ $treeout -eq 1 ]; then
+# parttreeoutopt="-t"
+# if [ $cycle -eq 0 ]; then
+# treeoutopt="-t -T"
+# groupsize=1
+# iterate=0
+# if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then
+# distance="distonly"
+# fi
+# else
+# treeoutopt="-t"
+# fi
+# else
+# parttreeoutopt=" "
+# if [ $cycle -eq 0 ]; then
+# treeoutopt="-t -T"
+# iterate=0
+# if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then
+# distance="distonly"
+# fi
+# else
+# treeoutopt=" "
+# fi
+# fi
+
+#new
+ if [ $cycle -eq 0 ]; then
+ treeoutopt="-t -T"
+ iterate=0
+ weighti="0.0" # 2016Jul31, tbfast.c kara idou
+# if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then # 2012/04, localpair --> local alignment distance
+# if [ $distance = "global" ]; then
+# distance="distonly"
+# fi
+ if [ $treeout -eq 1 ]; then
+ parttreeoutopt="-t"
+ groupsize=1
+ else
+ parttreeoutopt=" "
+ fi
+ if [ $distout -eq 1 ]; then
+ distoutopt="-y -T"
+ if [ $treeout -eq 0 ]; then
+ treeoutopt=""
+ fi
+ fi
+ else
+ if [ $treeout -eq 1 ]; then
+ parttreeoutopt="-t"
+ treeoutopt="-t"
+ else
+ parttreeoutopt=" "
+ treeoutopt=" "
+ fi
+ if [ $distout -eq 1 ]; then
+ distoutopt="-y"
+ fi
+ fi
+#
+
+ formatcheck=`grep -c '^[[:blank:]]\+>' $TMPFILE/infile | head -1 `
+ if [ $formatcheck -gt 0 ]; then
+ echo "The first character of a description line must be " 1>&2
+ echo "the greater-than (>) symbol, not a blank." 1>&2
+ echo "Please check the format around the following line(s):" 1>&2
+ grep -n '^[[:blank:]]\+>' $TMPFILE/infile 1>&2
+ exit 1
+ fi
+
+ nseq=`grep -c '^[>|=]' $TMPFILE/infile | head -1 `
+ if [ $nseq -eq 2 ]; then
+ cycle=1
+ fi
+ if [ $cycle -gt 3 ]; then
+ cycle=3
+ fi
+
+ if [ $nseq -gt 60000 -a $iterate -gt 1 ]; then # 2014/Oct/22, test
+ echo "Too many sequences to perform iterative refinement!" 1>&2
+ echo "Please use a progressive method." 1>&2
+ exit 1
+ fi
+ if [ $distance = "lastmulti" -o $distance = "multi" ]; then
+ if [ $fragment -eq 0 ]; then
+ echo 'Specify --addfragments too' 1>&2
+ exit 1
+ fi
+ fi
+
+ if [ $fragment -ne 0 ]; then
+ if [ $pairspecified -eq 0 ]; then
+ distance="multi"
+ fi
+ if [ $distance != "multi" -a $distance != "hybrid" -a $distance != "lastmulti" -a $distance != "local" -a $distance != "last" -a $distance != "ktuples" -a $distance != "ktuplesmulti" ]; then
+ echo 'Specify --multipair, --lastmultipair, --lastpair, --localpair, --6merpair, --6mermultipair or --hybridpair' 1>&2
+ exit 1
+ fi
+ fi
+
+ if [ "$memopt" = " -M -B " -a "$distance" != "ktuples" ]; then
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+
+ if [ $distance = "parttree" ]; then
+ if [ $mergetable != "/dev/null" ]; then
+ echo "The combination of (dp)parttree and merge is Impossible. " 1>&2
+ exit 1
+ fi
+ if [ $addfile != "/dev/null" ]; then
+ echo "The combination of (dp)parttree and add(fragments) is Impossible. " 1>&2
+ exit 1
+ fi
+ if [ $seed != "x" -o $seedtable != "x" ]; then
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ if [ $iterate -gt 1 ]; then
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ if [ $outorder = "aligned" ]; then
+ outorder="input"
+ fi
+ outorder="input" # partorder ga kiku
+ if [ $partdist = "localalign" ]; then
+ splitopt=" -U " # -U -l -> fast
+ cycle=1
+ elif [ $partdist = "fasta" ]; then
+ splitopt=" -S "
+ cycle=1
+ else
+ splitopt=" "
+ fi
+ fi
+
+
+ if [ \( $distance = "ktuples" -o $distance = "ktuplesmulti" \) -a \( $seed = "x" -a $seedtable = "x" -a $ownlist = "/dev/null" -a $pdblist = "/dev/null" -a $enrichstr -eq 0 \) ]; then
+ localparam=""
+ weighti="0.0"
+ elif [ \( $distance = "ktuples" -o $distance = "ktuplesmulti" \) -a \( $seed != "x" -o $seedtable != "x" -o $ownlist != "/dev/null" -o $pdblist != "/dev/null" -o $enrichstr -eq 1 \) ]; then
+ if [ $cycle -lt 2 ]; then
+ cycle=2 # disttbfast ha seed hi-taiou # chuui 2014Aug21
+ fi
+ if [ $iterate -lt 2 ]; then
+ echo "############################################################################" 1>&2
+ echo "# Warning:" 1>&2
+ echo "# Progressive alignment method is incompatible with the --seed option." 1>&2
+ echo "# Automatically switched to the iterative refinement method." 1>&2
+ echo "# " 1>&2
+ echo "# Also consider using the '--add' option, which is compatible with" 1>&2
+ echo "# the progressive method and FASTER than the '--seed' option." 1>&2
+ echo "# Usage is:" 1>&2
+ echo "# % mafft --add newSequences existingAlignment > output" 1>&2
+ echo "############################################################################" 1>&2
+ iterate=2
+ fi
+ localparam="-l "$weighti
+ elif [ $distance = "parttree" ]; then
+ localparam=""
+ weighti="0.0"
+ if [ $groupsize -gt -1 ]; then
+ cycle=1
+ fi
+ else
+ localparam="-B -l "$weighti # weighti=0 demo bunkatsu nashi
+ if [ $cycle -gt 1 ]; then # 09/01/08
+ cycle=1
+ fi
+ fi
+
+
+ if [ $distance = "localgenaf" -o $distance = "globalgenaf" ]; then
+ aof="0.000"
+ if [ $oldgenafparam -ne 1 ]; then
+ laof="0.0"
+ lexp="0.0"
+# LEXP="0.0" # default = 0.0
+ usenaivepairscore="-Z"
+ fi
+ fi
+
+
+# if [ $nseq -gt 5000 ]; then
+# fft=0
+# fi
+ if [ $forcefft -eq 1 ]; then
+ param_fft=" -G "
+ fft=1
+ elif [ $fft -eq 1 ]; then
+ param_fft=" -F "
+ else
+ param_fft=" "
+ fi
+
+ if [ $seed != "x" -a $seedtable != "x" ]; then
+ echo 'Use either one of seedtable and seed. Not both.' 1>&2
+ exit 1
+ fi
+ if [ $f2clext = "-E" -a $anysymbol -gt 0 ]; then
+ echo '' 1>&2
+ echo 'At present, the combination of --text and ( --anysymbol or --preservecase ) is impossible.' 1>&2
+ echo '' 1>&2
+ exit 1
+ fi
+
+ if [ $f2clext = "-E" -a $aamatrix != "/dev/null" ]; then
+ echo '' 1>&2
+ echo 'At present, the combination of --text and (--aamatrix) is impossible.' 1>&2
+ echo '' 1>&2
+ exit 1
+ fi
+
+ if [ $treein -eq 1 ]; then
+# if [ $iterate -gt 0 ]; then
+# echo 'Not supported yet.' 1>&2
+# exit 1
+# fi
+ if [ ! -s $TMPFILE/_guidetree ]; then
+ if [ $distance != "ktuples" ]; then
+ echo "Not supported yet" 1>>"$progressfile"
+ exit 1
+ fi
+ if [ $pileuporshuffle = "p" ]; then
+ echo "pileup" > $TMPFILE/_guidetree
+# weightopt=" -u " -> disttbfast.c?
+# numthreadstb=0 -> disttbfast.c
+ cycle=1 # disttbfast. shitei
+ elif [ $pileuporshuffle = "s" ]; then
+ echo "shuffle $randomseed" > $TMPFILE/_guidetree
+# numthreadstb=0 -> disttbfast.c
+# weightopt=" -u " -> disttbfast.c?
+ cycle=1 # disttbfast.c dem shitei
+ elif [ $pileuporshuffle = "C" ]; then
+ echo "very compact" > $TMPFILE/_guidetree
+ elif [ $pileuporshuffle = "c" ]; then
+ echo "compact " "$initialramusage" > $TMPFILE/_guidetree
+ elif [ $pileuporshuffle = "a" ]; then
+ echo "auto $randomseed 200" > $TMPFILE/_guidetree
+ fi
+ fi
+ fi
+
+ if [ $nadd -gt "0" ]; then
+ if [ $fragment -eq "1" ]; then
+ addarg="$addarg0 $nadd -g -0.01"
+ addsinglearg=""
+ cycle=1 # chuui 2014Aug25
+ elif [ $fragment -eq "-1" ]; then
+ addarg="$addarg0 $nadd"
+ addsinglearg="-V" # allowlongadds, 2014/04/02
+ cycle=1 # chuui 2014Aug25
+ elif [ $fragment -eq "-2" ]; then
+ addarg="$addarg0 $nadd"
+ addsinglearg="-V" # allowlongadds + smoothing
+ add2ndhalfarg=$add2ndhalfarg" -p "
+ cycle=1 # chuui 2014Aug25
+ usenaivepairscore="-Z" # 2015Jun01
+ laof=0.0 # 2015Jun01
+ lexp=0.0 # 2015Jun01
+ else
+ addarg="$addarg0 $nadd"
+ addsinglearg=""
+ fi
+
+# cycle=1 # chuui 2014Aug19
+ iterate=0
+# treealg=" -q " ## 2012/01/24 ## removed 2012/02/06
+ fi
+
+
+ if [ -z "$localparam" -a $fragment -eq 0 -a $distance != "parttree" ]; then
+# echo "use disttbfast"
+# echo cycle = $cycle
+ cycletbfast=1 # tbfast wo jikkou shinai
+ cycledisttbfast=$cycle # disttbfast ni -E cycle wo watasu
+ if [ $cycledisttbfast -eq 0 ]; then # --treeout de tsukau
+ cycledisttbfast=1
+ fi
+ else
+# echo "use tbfast"
+# echo cycle = $cycle
+ cycletbfast=$cycle # 1 ijou nara jikkou
+ cycledisttbfast=1 # disttbfast ha ikkai dake
+ fi
+
+# echo localparam=
+# echo $localparam
+# echo cycletbfast=
+# echo $cycletbfast
+# echo cycledisttbfast=
+# echo $cycledisttbfast
+
+#exit
+
+ if [ $adjustdirection -gt 0 -a $seed != "x" ]; then
+ echo '' 1>&2
+ echo 'The combination of --adjustdirection(accurately) and --seed is not supported.' 1>&2
+ echo '' 1>&2
+ exit 1
+ fi
+
+
+ if [ $mccaskill -eq 1 -o $dafs -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then
+ if [ $distance = "ktuples" ]; then
+ echo 'Not supported.' 1>&2
+ echo 'Please add --globalpair, --localpair, --scarnapair, --dafspair' 1>&2
+ echo '--larapair, --slarapair, --foldalignlocalpair or --foldalignglobalpair' 1>&2
+ exit 1
+ fi
+ if [ $f2clext = "-E" ]; then
+ echo '' 1>&2
+ echo 'For RNA alignment, the --text mode is impossible.' 1>&2
+ echo '' 1>&2
+ exit 1
+ fi
+ fi
+
+# cycle ga atode henkou sareru node koko de strategy no namae wo kimeru.
+# kokokara
+ if [ $pileuporshuffle = "p" ]; then
+ strategy="Pileup-"
+ elif [ $pileuporshuffle = "s" ]; then
+ strategy="Randomchain-"
+ elif [ $mccaskill -eq 1 -o $dafs -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then
+ if [ $distance = "scarna" -o $distance = "dafs" -o $distance = "lara" -o $distance = "slara" -o $distance = "foldalignlocal" -o $distance = "foldalignglobal" ]; then
+ strategy="X-"
+ elif [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o "globalgenaf" ]; then
+ strategy="Q-"
+ fi
+ elif [ $distance = "fasta" -a $sw -eq 0 ]; then
+ strategy="F-"
+ elif [ $distance = "fasta" -a $sw -eq 1 ]; then
+ strategy="H-"
+ elif [ $distance = "blast" ]; then
+ strategy="B-"
+ elif [ $distance = "global" -o $distance = "distonly" ]; then
+ strategy="G-"
+ elif [ $distance = "local" ]; then
+ strategy="L-"
+ elif [ $distance = "last" ]; then
+ strategy="Last-"
+ elif [ $distance = "hybrid" ]; then
+ strategy="Hybrid-"
+ elif [ $distance = "multi" ]; then
+ strategy="Multi-"
+ elif [ $distance = "lastmulti" ]; then
+ strategy="LastMulti-"
+ elif [ $distance = "localgenaf" ]; then
+ strategy="E-"
+ elif [ $distance = "globalgenaf" ]; then
+ strategy="K-"
+ elif [ $fft -eq 1 ]; then
+ strategy="FFT-"
+ else
+ strategy="NW-"
+ fi
+# if [ `echo "$weighti>0.0" | bc` -gt 0 ]; then
+ if [ `awk "BEGIN {print(0.0+\"$weighti\">0.0)}"` -gt 0 ]; then
+ strategy=$strategy"I"
+ fi
+ strategy=$strategy"NS-"
+ if [ $iterate -gt 0 ]; then
+ strategy=$strategy"i"
+ elif [ $distance = "parttree" ]; then
+ if [ $partdist = "fasta" ]; then
+ strategy=$strategy"FastaPartTree-"$cycle
+ elif [ $partdist = "localalign" ]; then
+ strategy=$strategy"DPPartTree-"$cycle
+ else
+ strategy=$strategy"PartTree-"$cycle
+ fi
+ elif [ $fragment -eq 1 ]; then
+ strategy=$strategy"fragment"
+ elif [ $fragment -eq -1 ]; then
+ strategy=$strategy"full"
+ elif [ $fragment -eq -2 ]; then
+ strategy=$strategy"long"
+ else
+ strategy=$strategy$cycle
+ fi
+
+ explanation='?'
+ performance='Not tested.'
+ if [ $strategy = "F-INS-i" ]; then
+ explanation='Iterative refinement method (<'$iterate') with LOCAL pairwise alignment information'
+ performance='Most accurate, but very slow'
+ elif [ $strategy = "L-INS-i" ]; then
+ explanation='Iterative refinement method (<'$iterate') with LOCAL pairwise alignment information'
+ performance='Probably most accurate, very slow'
+ elif [ $strategy = "E-INS-i" ]; then
+ explanation='Iterative refinement method (<'$iterate') with LOCAL pairwise alignment with generalized affine gap costs (Altschul 1998)'
+ performance='Suitable for sequences with long unalignable regions, very slow'
+ elif [ $strategy = "G-INS-i" ]; then
+ explanation='Iterative refinement method (<'$iterate') with GLOBAL pairwise alignment information'
+ performance='Suitable for sequences of similar lengths, very slow'
+ elif [ $strategy = "X-INS-i" ]; then
+ explanation='RNA secondary structure information is taken into account.'
+ performance='For short RNA sequences only, extremely slow'
+ elif [ $strategy = "F-INS-1" ]; then
+ explanation='Progressive method incorporating LOCAL pairwise alignment information'
+ elif [ $strategy = "L-INS-1" ]; then
+ explanation='Progressive method incorporating LOCAL pairwise alignment information'
+ elif [ $strategy = "G-INS-1" ]; then
+ explanation='Progressive method incorporating GLOBAL pairwise alignment information'
+ elif [ $strategy = "FFT-NS-i" -o $strategy = "NW-NS-i" ]; then
+ explanation='Iterative refinement method (max. '$iterate' iterations)'
+ if [ $iterate -gt 2 ]; then
+ performance='Accurate but slow'
+ else
+ performance='Standard'
+ fi
+ elif [ $strategy = "FFT-NS-2" -o $strategy = "NW-NS-2" ]; then
+ explanation='Progressive method (guide trees were built '$cycle' times.)'
+ performance='Fast but rough'
+ elif [ $strategy = "FFT-NS-1" -o $strategy = "NW-NS-1" ]; then
+ explanation='Progressive method (rough guide tree was used.)'
+ performance='Very fast but very rough'
+ fi
+
+ if [ $outputformat = "clustal" -a $outorder = "aligned" ]; then
+ outputopt=" -c $strategy -r $TMPFILE/order $f2clext "
+ elif [ $outputformat = "clustal" -a $outorder = "input" ]; then
+ outputopt=" -c $strategy $f2clext "
+ elif [ $outputformat = "phylip" -a $outorder = "aligned" ]; then
+ outputopt=" -y -r $TMPFILE/order "
+ elif [ $outputformat = "phylip" -a $outorder = "input" ]; then
+ outputopt=" -y "
+ elif [ $outputformat = "pir" -a $outorder = "aligned" ]; then
+ outputopt=" -f -r $TMPFILE/order "
+ else
+ outputopt="null"
+ fi
+# kokomade
+
+
+
+# ( # 2017/Mar/17
+ pushd $TMPFILE > /dev/null;
+
+ cat /dev/null > pre
+
+# echo "nseq = " $nseq 1>>"$progressfile"
+# echo "distance = " $distance 1>>"$progressfile"
+# echo "iterate = " $iterate 1>>"$progressfile"
+# echo "cycle = " $cycle 1>>"$progressfile"
+
+ if [ $anysymbol -eq 1 ]; then
+ mv infile orig
+ "$prefix/replaceu" $seqtype -i orig > infile 2>>"$progressfile" || exit 1
+ fi
+
+ if [ $mergetable != "/dev/null" ]; then
+ if [ $nadd -gt "0" ]; then
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+# if [ $seed != "x" -o $seedtable != "x" ]; then
+# echo "This version does not support the combination of merge and seed." 1>&2
+# exit 1
+# fi
+# iterate=0 # 2013/04/16
+ mergearg="-H $seedoffset"
+ fi
+
+ if [ $adjustdirection -gt 0 ]; then
+ if [ $fragment -ne 0 ]; then
+ fragarg="-F" #
+ else
+ fragarg="-F" # 2014/02/06, do not consider other additional sequences, even in the case of --add
+ fi
+ if [ $adjustdirection -eq 1 ]; then
+ "$prefix/makedirectionlist" $fragarg -C $numthreads -m -I $nadd -i infile -t 0.00 -r 5000 -o a > _direction 2>>"$progressfile"
+ elif [ $adjustdirection -eq 2 ]; then
+ "$prefix/makedirectionlist" $fragarg -C $numthreads -m -I $nadd -i infile -t 0.00 -r 100 -o a -d > _direction 2>>"$progressfile"
+ fi
+ "$prefix/setdirection" $mergearg -d _direction -i infile > infiled 2>>"$progressfile" || exit
+ mv infiled infile
+ if [ $anysymbol -eq 1 ]; then
+ "$prefix/setdirection" $mergearg -d _direction -i orig -r > origd 2>>"$progressfile" || exit
+ mv origd orig
+ fi
+ fi
+
+ if [ $seed != "x" -o $seedtable != "x" ]; then
+ if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then
+ echo "The combination of --seed and (--pdbidlist or --pdbfilelist) is impossible." 1>>"$progressfile"
+ exit 1
+ fi
+ if [ $enrich -eq 1 ]; then
+ echo "The combination of --seed and (--enrich, --enrichseq or --enrichstr) is impossible at present." 1>>"$progressfile"
+ exit 1
+ fi
+ fi
+
+ if [ $enrich -eq 1 ]; then
+ if [ $ownlist != "/dev/null" ]; then
+ echo "Warning: Sequence homologs of the structures given with the --pdbfilelist option cannot be collected.\n" 1>>"$progressfile"
+ fi
+ echo "SEEKQUENCER (http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/) is" 1>>"$progressfile"
+ if [ $pdblist != "/dev/null" ]; then
+ echo "collecting homoplogs of the input sequences and the structures given with the --pdbidlist option." 1>>"$progressfile"
+ perl "$prefix/seekquencer_premafft.pl" $seektarget -run thread -trd 2 -seqd uniref90 -blim 1000 -noin -seqf infile -idf pdblist -out seekout -mod mafftash-split 2>>"seekerr"
+ seekres="$?"
+ else
+ echo "collecting homologs of the input sequences." 1>>"$progressfile"
+ perl "$prefix/seekquencer_premafft.pl" $seektarget -run thread -trd 2 -seqd uniref90 -blim 1000 -noin -seqf infile -out seekout -mod mafftash-split 2>>"seekerr"
+ seekres="$?"
+ fi
+ cat seekerr 1>>"$progressfile"
+
+ if [ $seekres -ne "0" ]; then
+ echo "Error in SEEKQUENCER" 1>>"$progressfile"
+ exit 1;
+ fi
+ echo "Done." 1>>"$progressfile"
+
+ if [ $enrichseq -eq 1 ]; then
+# cat seekout.seq >> infile
+ if [ $anysymbol -eq 1 ]; then
+ "$prefix/replaceu" $seqtype -i seekout.seq -o $nseq >> infile
+ cat seekout.seq >> orig
+ else
+ "$prefix/replaceu" $seqtype -i seekout.seq | sed 's/_os_[0-9]*_oe_//' >> infile
+ fi
+
+ fi
+ if [ $enrichstr -eq 1 ]; then
+ nseekstr=`wc -l < seekout.str`
+ if [ $nseekstr -gt 1 ]; then
+ cat seekout.str >> pdblist
+ pdblist="tsukaimasu"
+ fi
+ fi
+ fi
+
+ if [ $seed != "x" ]; then
+ mv infile infile2
+ if [ $anysymbol -eq 1 ]; then
+ mv orig orig2
+ cat /dev/null > orig
+ fi
+ cat /dev/null > infile
+ cat /dev/null > hat3.seed
+ seedoffset=0
+# echo "seednseq="$seednseq
+# echo "seedoffset="$seedoffset
+ set $seednseq >> "$progressfile"
+# echo $#
+ while [ $# -gt 1 ]
+ do
+ shift
+# echo "num="$#
+
+ if [ $anysymbol -eq 1 ]; then
+ cat seed$# >> orig
+ "$prefix/replaceu" $seqtype -i seed$# -o $seedoffset > clean 2>>"$progressfile" || exit 1
+ mv clean seed$#
+ fi
+ "$prefix/multi2hat3s" -t $nseq -o $seedoffset -i seed$# >> infile 2>>"$progressfile" || exit 1
+ cat hat3 >> hat3.seed
+# echo "$1"
+ seedoffset=`expr $seedoffset + $1`
+# echo "$1"
+# echo "seedoffset="$seedoffset
+ done;
+# echo "seedoffset="$seedoffset
+ if [ $anysymbol -eq 1 ]; then
+ "$prefix/replaceu" $seqtype -i orig2 -o $seedoffset >> infile 2>>"$progressfile" || exit 1 # yarinaoshi
+ cat orig2 >> orig
+ else
+ cat infile2 >> infile
+ fi
+ elif [ $seedtable != "x" ]; then
+ cat _seedtablefile > hat3.seed
+ elif [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then
+ mv infile infile2
+ if [ $anysymbol -eq 1 ]; then
+ mv orig orig2
+ cat /dev/null > orig
+ fi
+ cat /dev/null > infile
+
+ echo "strdir = " 1>>"$progressfile"
+ echo $strdir 1>>"$progressfile"
+
+ echo "Calling DASH (http://sysimm.ifrec.osaka-u.ac.jp/dash/)" 1>>"$progressfile"
+ perl "$prefix/mafftash_premafft.pl" -p pdblist -o ownlist -d "$strdir" 2>>"dasherr"
+ dashres="$?"
+ cat dasherr 1>>"$progressfile"
+
+ if [ $dashres -ne "0" ]; then
+ echo "Error in DASH" 1>>"$progressfile"
+ exit 1;
+ fi
+ echo "Done." 1>>"$progressfile"
+
+ seedoffset=`grep -c '^[>|=]' instr | head -1 `
+
+ echo "# of structures = " 1>>"$progressfile"
+ echo $seedoffset 1>>"$progressfile"
+ mv hat3 hat3.seed
+
+ if [ $anysymbol -eq 1 ]; then
+ cat instr >> orig
+ "$prefix/replaceu" $seqtype -i instr -o 0 > clean 2>>"$progressfile" || exit 1
+ mv clean infile
+
+ "$prefix/replaceu" $seqtype -i orig2 -o $seedoffset >> infile 2>>"$progressfile" || exit 1 # yarinaoshi
+ cat orig2 >> orig
+ else
+ cat instr > infile
+ cat infile2 >> infile
+ fi
+ else
+ cat /dev/null > hat3.seed
+ fi
+# cat hat3.seed
+
+
+
+
+ if [ $mccaskill -eq 1 ]; then
+ "$prefix/mccaskillwrap" -s -C $numthreads -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1
+ elif [ $dafs -eq 1 ]; then
+ "$prefix/mccaskillwrap" -G -C $numthreads -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1
+ elif [ $contrafold -eq 1 ]; then
+ "$prefix/contrafoldwrap" -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1
+ fi
+ if [ $distance = "fasta" ]; then
+ "$prefix/dndfast7" $swopt < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "blast" ]; then
+ "$prefix/dndblast" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "foldalignlocal" ]; then
+ "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $lexp -f $lgop -Q $spfactor -h $laof -H -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "foldalignglobal" ]; then
+ "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -H -o -global -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "slara" ]; then
+ "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -Q $spfactor -T -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "lara" ]; then
+ "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -Q $spfactor -B -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "scarna" ]; then
+# "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -Q $spfactor -s -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+# cat hat3.seed hat3 > hatx
+# mv hatx hat3
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/tbfast" _ -C $numthreads $seqtype $model -f $pggop -Q $spfactor -s -d "$prefix" _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "dafs" ]; then
+ "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -Q $spfactor -G -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "global" ]; then
+# "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -A $usenaivepairscore $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# cat hat3.seed hat3 > hatx
+# mv hatx hat3
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -A $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+
+ elif [ $distance = "local" ]; then
+ if [ $fragment -ne 0 ]; then
+ "$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+# "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# addarg wo watasanai
+# cat hat3.seed hat3 > hatx
+# mv hatx hat3
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ fi
+ elif [ $distance = "globalgenaf" ]; then
+ "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -O $GGOP -E $GEXP -K $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "localgenaf" ]; then
+# "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -O $LGOP -E $LEXP -N $usenaivepairscore $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# cat hat3.seed hat3 > hatx
+# mv hatx hat3
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -O $LGOP -E $LEXP -N $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "last" ]; then
+ if [ $fragment -ne 0 ]; then
+ "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ "$prefix/pairlocalalign" -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+# addarg wo watasanai
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ fi
+ elif [ $distance = "lastmulti" ]; then
+ "$prefix/dndpre" $model -M 2 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1
+ mv hat2 hat2i
+ "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -r $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hat2 hat2n
+ mv hatx hat3
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ elif [ $distance = "multi" ]; then
+ "$prefix/dndpre" $model -M 2 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
+ mv hat2 hat2i
+ "$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -Y $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hat2 hat2n
+ mv hatx hat3
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ elif [ $distance = "hybrid" ]; then
+ "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -Y < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/disttbfast" -E 1 -s $unalignlevel $legacygapopt -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ fi
+# elif [ $distance = "distonly" ]; then
+# "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -t < infile > /dev/null 2>>"$progressfile" || exit 1
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "parttree" ]; then
+ "$prefix/splittbfast" $legacygapopt $algopt $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof -p $partsize -s $groupsize $treealg $outnum -i infile > pre 2>>"$progressfile" || exit 1
+ mv hat3.seed hat3
+ elif [ $distance = "ktuplesmulti" ]; then
+# "$prefix/dndpre" $model -M 1 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1
+# mv hat2 hat2i
+# "$prefix/disttbfast" -E 1 -s $unalignlevel $legacygapopt -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# mv hat2 hat2n
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -d -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ else
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ "$prefix/disttbfast" -q $npickup -E $cycledisttbfast -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > pre 2>>"$progressfile" || exit 1
+ mv hat3.seed hat3
+ fi
+ fi
+ while [ $cycletbfast -gt 1 ]
+ do
+ if [ $distance = "parttree" ]; then
+ mv pre infile
+ "$prefix/splittbfast" $legacygapopt -Z $algopt $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof -p $partsize -s $groupsize $treealg $outnum -i infile > pre 2>>"$progressfile" || exit 1
+ else
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum -C $numthreadstb $rnaopt $weightopt $treeoutopt $distoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt -J $treealg $scoreoutarg < pre > /dev/null 2>>"$progressfile" || exit 1
+# fragment>0 no baai, nanimoshinai
+# seed youchuui!!
+ fi
+ cycletbfast=`expr $cycletbfast - 1`
+ done
+ if [ $iterate -gt 0 ]; then
+ if [ $distance = "ktuples" ]; then
+ "$prefix/dndpre" $model -M 2 -C $numthreads < pre > /dev/null 2>>"$progressfile" || exit 1
+ fi
+ "$prefix/dvtditr" -W $minimumweight $bunkatsuopt -E $fixthreshold -s $unalignlevel $legacygapopt $mergearg $outnum -C $numthreadsit -t $randomseed $rnaoptit $memopt $scorecalcopt $localparam -z 50 $seqtype $model -f "-"$gop -Q $spfactor -h $aof -I $iterate $weightopt $treeinopt $algoptit $treealg -p $parallelizationstrategy $scoreoutarg < pre > /dev/null 2>>"$progressfile" || exit 1
+ fi
+ if [ $coreout -eq 1 ]; then
+ "$prefix/setcore" -w $corewin -i $corethr $coreext < pre > pre2
+ mv pre2 pre
+ elif [ $anysymbol -eq 1 ]; then
+ "$prefix/restoreu" $add2ndhalfarg -a pre -i orig > restored || exit 1
+ mv restored pre
+ fi
+
+
+
+
+ echo '' 1>>"$progressfile"
+ if [ $mccaskill -eq 1 ]; then
+ echo "RNA base pairing probaility was calculated by the McCaskill algorithm (1)" 1>>"$progressfile"
+ echo "implemented in Vienna RNA package (2) and MXSCARNA (3), and then" 1>>"$progressfile"
+ echo "incorporated in the iterative alignment process (4)." 1>>"$progressfile"
+ echo "(1) McCaskill, 1990, Biopolymers 29:1105-1119" 1>>"$progressfile"
+ echo "(2) Hofacker et al., 2002, J. Mol. Biol. 319:3724-3732" 1>>"$progressfile"
+ echo "(3) Tabei et al., 2008, BMC Bioinformatics 9:33" 1>>"$progressfile"
+ echo "(4) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ elif [ $contrafold -eq 1 ]; then
+ echo "RNA base pairing probaility was calculated by the CONTRAfold algorithm (1)" 1>>"$progressfile"
+ echo "and then incorporated in the iterative alignment process (4)." 1>>"$progressfile"
+ echo "(1) Do et al., 2006, Bioinformatics 22:e90-98" 1>>"$progressfile"
+ echo "(2) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ fi
+ if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then
+ echo "Input structures are decomposed into structural domains using" 1>>"$progressfile"
+ echo "Protein Domain Parser (Alexandrov & Shindyalov 2003)." 1>>"$progressfile"
+ echo "Domain pairs are aligned using the rash function in" 1>>"$progressfile"
+ echo "the ASH structural alignment package (Standley et al. 2007)." 1>>"$progressfile"
+ fi
+ if [ $pdblist != "/dev/null" ]; then
+ echo "Pre-computed alignments stored in " 1>>"$progressfile"
+ echo "DASH (http://sysimm.ifrec.osaka-u.ac.jp/dash/) are used. " 1>>"$progressfile"
+ fi
+ if [ $distance = "fasta" -o $partdist = "fasta" ]; then
+ echo "Pairwise alignments were computed by FASTA" 1>>"$progressfile"
+ echo "(Pearson & Lipman, 1988, PNAS 85:2444-2448)" 1>>"$progressfile"
+ fi
+ if [ $distance = "blast" ]; then
+ echo "Pairwise alignments were computed by BLAST" 1>>"$progressfile"
+ echo "(Altschul et al., 1997, NAR 25:3389-3402)" 1>>"$progressfile"
+ fi
+ if [ $distance = "last" -o $distance = "lastmulti" ]; then
+ echo "Pairwise alignments were computed by LAST" 1>>"$progressfile"
+ echo "http://last.cbrc.jp/" 1>>"$progressfile"
+ echo "Kielbasa, Wan, Sato, Horton, Frith 2011 Genome Res. 21:487" 1>>"$progressfile"
+ fi
+ if [ $distance = "scarna" ]; then
+ echo "Pairwise alignments were computed by MXSCARNA" 1>>"$progressfile"
+ echo "(Tabei et al., 2008, BMC Bioinformatics 9:33)." 1>>"$progressfile"
+ fi
+ if [ $distance = "dafs" ]; then
+ echo "Pairwise alignments were computed by DAFS" 1>>"$progressfile"
+ echo "(Sato et al., 2012,,,,)." 1>>"$progressfile"
+ fi
+ if [ $distance = "lara" -o $distance = "slara" ]; then
+ echo "Pairwise alignments were computed by LaRA" 1>>"$progressfile"
+ echo "(Bauer et al., 2007, BMC Bioinformatics 8:271)." 1>>"$progressfile"
+ fi
+ if [ $distance = "foldalignlocal" ]; then
+ echo "Pairwise alignments were computed by FOLDALIGN (local)" 1>>"$progressfile"
+ echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>>"$progressfile"
+ fi
+ if [ $distance = "foldalignglobal" ]; then
+ echo "Pairwise alignments were computed by FOLDALIGN (global)" 1>>"$progressfile"
+ echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>>"$progressfile"
+ fi
+# printf "\n" 1>>"$progressfile"
+ echo 'Strategy:' 1>>"$progressfile"
+ printf ' '$strategy 1>>"$progressfile"
+ echo ' ('$performance')' 1>>"$progressfile"
+ echo ' '$explanation 1>>"$progressfile"
+ echo '' 1>>"$progressfile"
+ echo "If unsure which option to use, try 'mafft --auto input > output'." 1>>"$progressfile"
+ echo "For more information, see 'mafft --help', 'mafft --man' and the mafft page." 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ echo "The default gap scoring scheme has been changed in version 7.110 (2013 Oct)." 1>>"$progressfile"
+ echo "It tends to insert more gaps into gap-rich regions than previous versions." 1>>"$progressfile"
+ echo "To disable this change, add the --leavegappyregion option." 1>>"$progressfile"
+# echo "If long gaps are expected, try 'mafft --ep 0.0 --auto input > output'." 1>>"$progressfile"
+# echo "If the possibility of long gaps can be excluded, add '--ep 0.123'." 1>>"$progressfile"
+ if [ $distance = "localgenaf" -o $distance = "globalgenaf" ]; then
+ echo "" 1>>"$progressfile"
+ if [ $oldgenafparam -eq 1 ]; then
+ echo "Obsolete parameters used for this calculation." 1>>"$progressfile"
+ echo "Also try the new parameters for E-INS-i, by not specifying --oldgenafpair." 1>>"$progressfile"
+ else
+ echo "Parameters for the E-INS-i option have been changed in version 7.243 (2015 Jun)." 1>>"$progressfile"
+ echo "To switch to the old parameters, use --oldgenafpair, instead of --genafpair." 1>>"$progressfile"
+ fi
+ fi
+ echo '' 1>>"$progressfile"
+
+
+ if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then
+ cat dasherr >>"$progressfile"
+ fi
+
+# ) # 2017/Mar/17
+ popd > /dev/null;
+
+
+ if [ "$outputfile" = "" ]; then
+ if [ "$outputopt" = "null" ]; then
+ cat < $TMPFILE/pre || exit 1
+ else
+ "$prefix/f2cl" -n $namelength $outputopt < $TMPFILE/pre 2>>/dev/null || exit 1
+ fi
+ else
+ if [ "$outputopt" = "null" ]; then
+ cat < $TMPFILE/pre > "$outputfile" || exit 1
+ else
+ "$prefix/f2cl" -n $namelength $outputopt < $TMPFILE/pre > "$outputfile" 2>>/dev/null || exit 1
+ fi
+ fi
+
+ if [ $treeout -eq 1 ]; then
+ cp $TMPFILE/infile.tree "$infilename.tree"
+ fi
+
+ if [ -s $TMPFILE/GuideTree ]; then # --merge no toki dake
+ cp $TMPFILE/GuideTree .
+ fi
+
+ if [ $distout -eq 1 ]; then
+ cp $TMPFILE/hat2 "$infilename.hat2"
+ fi
+
+ if [ $npickup -ne 0 ]; then
+ cp $TMPFILE/notused "$infilename.notused"
+ fi
+
+ if [ -s $TMPFILE/_deletemap ]; then
+ if [ "$mapoutfile" = "/dev/null" ]; then
+ cp $TMPFILE/_deletemap "$addfile.map"
+ else
+ cp $TMPFILE/_deletemap "$mapoutfile"
+ fi
+ fi
+
+ exit 0;
+fi
+
+prog="awk"
+
+tmpawk=`which nawk 2>/dev/null | awk '{print $1}'`
+if [ -x "$tmpawk" ]; then
+ prog="$tmpawk"
+fi
+
+tmpawk=`which gawk 2>/dev/null | awk '{print $1}'`
+if [ -x "$tmpawk" ]; then
+ prog="$tmpawk"
+fi
+
+#echo "prog="$prog 1>&2
+
+umask 077
+(
+$prog '
+BEGIN {
+ prefix = ENVIRON["prefix"];
+ version = ENVIRON["version"];
+ myself = ENVIRON["myself"];
+ while( 1 )
+ {
+ options = ""
+ printf( "\n" ) > "/dev/tty";
+ printf( "---------------------------------------------------------------------\n" ) > "/dev/tty";
+ printf( "\n" ) > "/dev/tty";
+ printf( " MAFFT %s\n", version ) > "/dev/tty";
+ printf( "\n" ) > "/dev/tty";
+ printf( " Copyright (c) 2016 Kazutaka Katoh\n" ) > "/dev/tty";
+ printf( " MBE 30:772-780 (2013), NAR 30:3059-3066 (2002)\n" ) > "/dev/tty";
+ printf( " http://mafft.cbrc.jp/alignment/software/\n" ) > "/dev/tty";
+ printf( "---------------------------------------------------------------------\n" ) > "/dev/tty";
+ printf( "\n" ) > "/dev/tty";
+
+ while( 1 )
+ {
+ printf( "\n" ) > "/dev/tty";
+ printf( "Input file? (fasta format)\n@ " ) > "/dev/tty";
+ res = getline < "/dev/tty";
+ close( "/dev/tty" )
+ if( res == 0 || NF == 0 )
+ continue;
+ infile = sprintf( "%s", $0 );
+
+ res = getline < infile;
+ close( infile );
+ if( res == -1 )
+ {
+ printf( "%s: No such file.\n\n", infile ) > "/dev/tty";
+ printf( "Filename extension (eg., .txt) must be typed, if any.\n\n" ) > "/dev/tty";
+ }
+ else if( res == 0 )
+ printf( "%s: Empty.\n", infile ) > "/dev/tty";
+ else
+ {
+ printf( "OK. infile = %s\n\n", infile ) > "/dev/tty";
+ break;
+ }
+ }
+ nseq = 0;
+
+ while( 1 )
+ {
+ printf( "\n" ) > "/dev/tty";
+ printf( "Output file?\n" ) > "/dev/tty";
+ printf( "@ " ) > "/dev/tty";
+ res = getline < "/dev/tty";
+ close( "/dev/tty" );
+ if( res == 0 || NF == 0 )
+ continue;
+ else
+ {
+ outfile = sprintf( "%s", $0 );
+ printf( "OK. outfile = %s\n\n", outfile ) > "/dev/tty";
+ break;
+ }
+ }
+
+ while( 1 )
+ {
+ outargs = "";
+ printf( "\n" ) > "/dev/tty";
+ printf( "Output format?\n" ) > "/dev/tty";
+ printf( " 1. Clustal format / Sorted\n" ) > "/dev/tty";
+ printf( " 2. Clustal format / Input order\n" ) > "/dev/tty";
+ printf( " 3. Fasta format / Sorted\n" ) > "/dev/tty";
+ printf( " 4. Fasta format / Input order\n" ) > "/dev/tty";
+ printf( " 5. Phylip format / Sorted\n" ) > "/dev/tty";
+ printf( " 6. Phylip format / Input order\n" ) > "/dev/tty";
+ printf( "@ " ) > "/dev/tty";
+ res = getline < "/dev/tty";
+ close( "/dev/tty" );
+# printf( "res=%d, NF=%d\n", res, NF );
+
+ resnum = 0 + $1;
+# printf( "resnum=%d\n", resnum );
+
+ if( resnum < 1 || 6 < resnum )
+ continue;
+ else
+ {
+ if( resnum == 1 )
+ outargs = "--clustalout --reorder";
+ else if( resnum == 2 )
+ outargs = "--clustalout --inputorder";
+ else if( resnum == 3 )
+ outargs = "--reorder";
+ else if( resnum == 4 )
+ outargs = "--inputorder";
+ else if( resnum == 5 )
+ outargs = "--phylipout --reorder";
+ else if( resnum == 6 )
+ outargs = "--phylipout --inputorder";
+ else
+ continue;
+ printf( "OK. arguments = %s\n\n", outargs ) > "/dev/tty";
+ break;
+ }
+ }
+
+ while( 1 )
+ {
+ arguments = "";
+ printf( "\n" ) > "/dev/tty";
+ printf( "Strategy?\n" ) > "/dev/tty";
+ printf( " 1. --auto\n" ) > "/dev/tty";
+ printf( " 2. FFT-NS-1 (fast)\n" ) > "/dev/tty";
+ printf( " 3. FFT-NS-2 (default)\n" ) > "/dev/tty";
+ printf( " 4. G-INS-i (accurate)\n" ) > "/dev/tty";
+ printf( " 5. L-INS-i (accurate)\n" ) > "/dev/tty";
+ printf( " 6. E-INS-i (accurate)\n" ) > "/dev/tty";
+ printf( "@ " ) > "/dev/tty";
+ res = getline < "/dev/tty";
+ close( "/dev/tty" );
+# printf( "res=%d, NF=%d\n", res, NF );
+
+ resnum = 0 + $1;
+# printf( "resnum=%d\n", resnum );
+
+ if( resnum < 1 || 6 < resnum )
+ continue;
+ else
+ {
+ if( resnum == 1 )
+ arguments = "--auto";
+ else if( resnum == 2 )
+ arguments = "--retree 1";
+ else if( resnum == 3 )
+ arguments = "--retree 2";
+ else if( resnum == 4 )
+ arguments = "--globalpair --maxiterate 16";
+ else if( resnum == 5 )
+ arguments = "--localpair --maxiterate 16";
+ else if( resnum == 6 )
+ arguments = "--genafpair --maxiterate 16";
+ else
+ arguments = sprintf( "%s", $0 );
+ printf( "OK. arguments = %s %s\n\n", arguments, outargs ) > "/dev/tty";
+ break;
+ }
+ }
+
+
+ while( 1 )
+ {
+ printf( "\n" ) > "/dev/tty";
+ printf( "Additional arguments? (--ep # --op # --kappa # etc)\n" ) > "/dev/tty";
+ printf( "@ " ) > "/dev/tty";
+ res = getline < "/dev/tty";
+ close( "/dev/tty" );
+ if( res == 0 || NF == 0 )
+ {
+ break;
+ }
+ else
+ {
+ addargs = sprintf( "%s", $0 );
+ printf( "OK. arguments = %s %s %s\n\n", addargs, arguments, outargs ) > "/dev/tty";
+ break;
+ }
+ }
+
+ arguments = sprintf( "%s %s %s", addargs, arguments, outargs );
+
+ print ""
+ command = sprintf( "\"%s\" %s \"%s\" > \"%s\"", myself, arguments, infile, outfile );
+ gsub( /\\/, "/", command );
+
+
+ printf( "command=\n%s\n", command ) > "/dev/tty";
+
+
+ while( 1 )
+ {
+ go = 0;
+ printf( "OK?\n" ) > "/dev/tty";
+ printf( "@ [Y] " ) > "/dev/tty";
+ res = getline < "/dev/tty";
+ close( "/dev/tty" );
+ if( res == 0 )
+ continue;
+ else if( NF == 0 || $0 ~ /^[Yy]/ )
+ {
+ go=1;
+ break;
+ }
+ else
+ break;
+ }
+ if( go ) break;
+ printf( "\n" ) > "/dev/tty";
+ printf( "\n" ) > "/dev/tty";
+ }
+ system( command );
+ command = sprintf( "more \"%s\"", outfile );
+ system( command );
+ printf( "Press Enter to exit." ) > "/dev/tty";
+ res = getline < "/dev/tty";
+}
+'
+)
+exit 0;
#define END_OF_VEC -1
-static int maxl;
-static int tsize;
static char outputformat;
-static float lenfaca, lenfacb, lenfacc, lenfacd;
+static int nadd;
#define PLENFACA 0.01
#define PLENFACB 10000
#define PLENFACC 10000
scoremtx = 1;
nblosum = 62;
dorp = NOTSPECIFIED;
+ nadd = 0;
alg = 'X';
while( --argc > 0 && (*++argv)[0] == '-' )
fprintf( stderr, "inputfile = %s\n", inputfile );
--argc;
goto nextoption;
+ case 'I':
+ nadd = myatoi(*++argv);
+ if( nadd == 0 )
+ {
+ fprintf( stderr, "nadd = %d?\n", nadd );
+ exit( 1 );
+ }
+ --argc;
+ goto nextoption;
case 'p':
outputformat = 'p';
break;
table[point]++;
}
-int commonsextet_p( short *table, int *pointt )
+static int localcommonsextet_p( short *table, int *pointt )
{
int value = 0;
short tmp;
static char **name;
static int *nlen;
double *mtxself;
- float score;
+ double score;
static short *table1;
- float longer, shorter;
- float lenfac;
- float bunbo;
+ double longer, shorter;
+ double lenfac;
+ double bunbo;
+ int norg;
arguments( argc, argv );
+
if( inputfile )
{
infp = fopen( inputfile, "r" );
constants( njob, seq );
+
+ if( nadd ) outputformat = 's';
+ norg = njob - nadd;
+
if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
else tsize = (int)pow( 6, 6 );
if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
makecompositiontable_p( table1, pointt[i] );
- score = commonsextet_p( table1, pointt[i] );
+ score = localcommonsextet_p( table1, pointt[i] );
mtxself[i] = score;
free( table1 );
}
fprintf( stderr, "done.\n" );
fprintf( stderr, "\nCalculating i-j scores ... \n" );
if( outputformat == 'p' ) fprintf( stdout, "%-5d", njob );
- for( i=0; i<njob; i++ )
+ for( i=0; i<norg; i++ )
{
if( outputformat == 'p' ) fprintf( stdout, "\n%-9d ", i+1 );
table1 = (short *)calloc( tsize, sizeof( short ) );
makecompositiontable_p( table1, pointt[i] );
- if( outputformat == 'p' ) initj = 0;
- else initj = i+1;
- for( j=initj; j<njob; j++ )
+ if( nadd == 0 )
+ {
+ if( outputformat == 'p' ) initj = 0;
+ else initj = i+1;
+ }
+ else
+ {
+ initj = norg;
+ }
+ for( j=initj; j<njob; j++ )
{
if( nlen[i] > nlen[j] )
{
- longer=(float)nlen[i];
- shorter=(float)nlen[j];
+ longer=(double)nlen[i];
+ shorter=(double)nlen[j];
}
else
{
- longer=(float)nlen[j];
- shorter=(float)nlen[i];
+ longer=(double)nlen[j];
+ shorter=(double)nlen[i];
}
// lenfac = 3.0 / ( LENFACA + LENFACB / ( longer + LENFACC ) + shorter / longer * LENFACD );
lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
// lenfac = 1.0;
// fprintf( stderr, "lenfac = %f (%.0f,%.0f)\n", lenfac, longer, shorter );
- score = commonsextet_p( table1, pointt[j] );
+ score = localcommonsextet_p( table1, pointt[j] );
bunbo = MIN( mtxself[i], mtxself[j] );
if( outputformat == 'p' )
{
fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, ( 1.0 - score / bunbo ) * lenfac, nlen[i], nlen[j] );
}
// fprintf( stderr, "##### mtx = %f, mtx[i][0]=%f, mtx[j][0]=%f, bunbo=%f\n", mtx[i][j-i], mtx[i][0], mtx[j][0], bunbo );
-// score = (double)commonsextet_p( table1, pointt[j] );
+// score = (double)localcommonsextet_p( table1, pointt[j] );
// fprintf( stdout, "%d-%d d=%4.2f l=%d,%d\n", i+1, j+1, ( 1.0 - score / MIN( mtxself[i], mtxself[j] ) ) * 3, nlen[i], nlen[j] );
--- /dev/null
+#!/usr/bin/env ruby
+
+localdb = "sp"
+# database name from which homologues are collected
+# by locally installed blast. Leave this if you do
+# not use the '-l' option.
+
+mafftpath = "/usr/local/bin/mafft"
+# path of mafft. "/usr/local/bin/mafft"
+# if mafft is in your command path, "mafft" is ok.
+
+blastpath = "blastall"
+# path of blastall.
+# if blastall is in your command path, "blastall" is ok.
+
+# mafft-homologs.rb v. 2.1 aligns sequences together with homologues
+# automatically collected from SwissProt via NCBI BLAST.
+#
+# mafft > 5.58 is required
+#
+# Usage:
+# mafft-homologs.rb [options] input > output
+# Options:
+# -a # the number of collected sequences (default: 50)
+# -e # threshold value (default: 1e-10)
+# -o "xxx" options for mafft
+# (default: " --op 1.53 --ep 0.123 --maxiterate 1000")
+# -l locally carries out blast searches instead of NCBI blast
+# (requires locally installed blast and a database)
+# -f outputs collected homologues also (default: off)
+# -w entire sequences are subjected to BLAST search
+# (default: well-aligned region only)
+
+#require 'getopts'
+require 'optparse'
+require 'tempfile'
+
+# mktemp
+GC.disable
+temp_vf = Tempfile.new("_vf").path
+temp_if = Tempfile.new("_if").path
+temp_pf = Tempfile.new("_pf").path
+temp_af = Tempfile.new("_af").path
+temp_qf = Tempfile.new("_qf").path
+temp_bf = Tempfile.new("_bf").path
+temp_rid = Tempfile.new("_rid").path
+temp_res = Tempfile.new("_res").path
+
+
+system( mafftpath + " --help > #{temp_vf} 2>&1" )
+pfp = File.open( "#{temp_vf}", 'r' )
+while pfp.gets
+ break if $_ =~ /MAFFT v/
+end
+pfp.close
+
+if( $_ ) then
+ mafftversion = $_.sub( /^\D*/, "" ).split(" ").slice(0).strip.to_s
+else
+ mafftversion = "0"
+end
+if( mafftversion < "5.58" ) then
+ STDERR.puts ""
+ STDERR.puts "======================================================"
+ STDERR.puts "Install new mafft (v. >= 5.58)"
+ STDERR.puts "======================================================"
+ STDERR.puts ""
+ exit
+end
+
+srand ( 0 )
+
+def readfasta( fp, name, seq )
+ nseq = 0
+ tmpseq = ""
+ while fp.gets
+ if $_ =~ /^>/ then
+ name.push( $_.sub(/>/,"").strip )
+ seq.push( tmpseq ) if nseq > 0
+ nseq += 1
+ tmpseq = ""
+ else
+ tmpseq += $_.strip
+ end
+ end
+ seq.push( tmpseq )
+ return nseq
+end
+
+nadd = 50
+eval = 1e-10
+local = 0
+fullout = 0
+entiresearch = 0
+corewin = 50
+corethr = 0.3
+mafftopt = " --op 1.53 --ep 0.123 --localpair --maxiterate 1000 --reorder "
+
+
+#if getopts( "s", "f", "w", "l", "h", "e:", "a:", "o:", "c:", "d:" ) == nil || ARGV.length == 0 || $OPT_h then
+# puts "Usage: #{$0} [-h -l -e# -a# -o\"[options for mafft]\"] input_file"
+# exit
+#end
+params = ARGV.getopts( "sfwlhe:a:o:c:d:" )
+
+
+#if $OPT_c then
+if params["c"] != nil then
+ corewin = params["c"].to_i
+end
+
+#if $OPT_d then
+if params["d"] != nil then
+ corethr = params["d"].to_f
+end
+
+#if $OPT_w
+if params["w"] == true then
+ entiresearch = 1
+end
+
+#if $OPT_f
+if params["f"] == true then
+ fullout = 1
+end
+
+#if $OPT_s
+if params["s"] == true then
+ fullout = 0
+end
+
+#if $OPT_l
+if params["l"] == true then
+ local = 1
+end
+
+#if $OPT_e then
+if params["e"] != nil then
+# eval = $OPT_e.to_f
+ eval = params["e"].to_f
+end
+
+#if $OPT_a then
+if params["a"] != nil then
+ nadd = params["a"].to_i
+end
+
+#if $OPT_o then
+if params["o"] != nil then
+ mafftopt += " " + params["o"] + " "
+end
+
+infn = ARGV[0].to_s.strip
+
+system "cat " + infn + " > #{temp_if}"
+ar = mafftopt.split(" ")
+nar = ar.length
+for i in 0..(nar-1)
+ if ar[i] == "--seed" then
+ system "cat #{ar[i+1]} >> #{temp_if}"
+ end
+end
+
+nseq = 0
+ifp = File.open( "#{temp_if}", 'r' )
+ while ifp.gets
+ nseq += 1 if $_ =~ /^>/
+ end
+ifp.close
+
+if nseq >= 100 then
+ STDERR.puts "The number of input sequences must be <100."
+ exit
+elsif nseq == 1 then
+ system( "cp #{temp_if}" + " #{temp_pf}" )
+else
+ STDERR.puts "Performing preliminary alignment .. "
+ if entiresearch == 1 then
+# system( mafftpath + " --maxiterate 1000 --localpair #{temp_if} > #{temp_pf}" )
+ system( mafftpath + " --maxiterate 0 --retree 2 #{temp_if} > #{temp_pf}" )
+ else
+ system( mafftpath + " --maxiterate 1000 --localpair --core --coreext --corethr #{corethr.to_s} --corewin #{corewin.to_s} #{temp_if} > #{temp_pf}" )
+ end
+end
+
+pfp = File.open( "#{temp_pf}", 'r' )
+inname = []
+inseq = []
+slen = []
+act = []
+nin = 0
+nin = readfasta( pfp, inname, inseq )
+for i in 0..(nin-1)
+ slen.push( inseq[i].gsub(/-/,"").length )
+ act.push( 1 )
+end
+pfp.close
+
+pfp = File.open( "#{temp_if}", 'r' )
+orname = []
+orseq = []
+nin = 0
+nin = readfasta( pfp, orname, orseq )
+pfp.close
+
+allen = inseq[0].length
+for i in 0..(nin-2)
+ for j in (i+1)..(nin-1)
+ next if act[i] == 0
+ next if act[j] == 0
+ pid = 0.0
+ total = 0
+ for a in 0..(allen-1)
+ next if inseq[i][a,1] == "-" || inseq[j][a,1] == "-"
+ total += 1
+ pid += 1.0 if inseq[i][a,1] == inseq[j][a,1]
+ end
+ pid /= total
+# puts "#{i.to_s}, #{j.to_s}, #{pid.to_s}"
+ if pid > 0.5 then
+ if slen[i] < slen[j]
+ act[i] = 0
+ else
+ act[j] = 0
+ end
+ end
+ end
+end
+#p act
+
+
+afp = File.open( "#{temp_af}", 'w' )
+
+STDERR.puts "Searching .. \n"
+ids = []
+add = []
+sco = []
+for i in 0..(nin-1)
+ inseq[i].gsub!(/-/,"")
+ afp.puts ">" + orname[i]
+ afp.puts orseq[i]
+
+# afp.puts ">" + inname[i]
+# afp.puts inseq[i]
+
+ STDERR.puts "Query (#{i+1}/#{nin})\n" + inname[i]
+ if act[i] == 0 then
+ STDERR.puts "Skip.\n\n"
+ next
+ end
+
+ if local == 0 then
+ command = "lynx -source 'https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?QUERY=" + inseq[i] + "&DATABASE=swissprot&HITLIST_SIZE=" + nadd.to_s + "&FILTER=L&EXPECT='" + eval.to_s + "'&FORMAT_TYPE=TEXT&PROGRAM=blastp&SERVICE=plain&NCBI_GI=on&PAGE=Proteins&CMD=Put' > #{temp_rid}"
+ system command
+
+ ridp = File.open( "#{temp_rid}", 'r' )
+ while ridp.gets
+ break if $_ =~ / RID = (.*)/
+ end
+ ridp.close
+ rid = $1.strip
+ STDERR.puts "Submitted to NCBI. rid = " + rid
+
+ STDERR.printf "Waiting "
+ while 1
+ STDERR.printf "."
+ sleep 10
+ command = "lynx -source 'https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RID=" + rid + "&DESCRIPTIONS=500&ALIGNMENTS=" + nadd.to_s + "&ALIGNMENT_TYPE=Pairwise&OVERVIEW=no&CMD=Get&FORMAT_TYPE=XML' > #{temp_res}"
+ system command
+ resp = File.open( "#{temp_res}", 'r' )
+# resp.gets
+# if $_ =~ /WAITING/ then
+# resp.close
+# next
+# end
+ while( resp.gets )
+ break if $_ =~ /QBlastInfoBegin/
+ end
+ resp.gets
+ if $_ =~ /WAITING/ then
+ resp.close
+ next
+ else
+ resp.close
+ break
+ end
+ end
+ else
+# puts "Not supported"
+# exit
+ qfp = File.open( "#{temp_qf}", 'w' )
+ qfp.puts "> "
+ qfp.puts inseq[i]
+ qfp.close
+ command = blastpath + " -p blastp -e #{eval} -b 1000 -m 7 -i #{temp_qf} -d #{localdb} > #{temp_res}"
+ system command
+ resp = File.open( "#{temp_res}", 'r' )
+ end
+ STDERR.puts " Done.\n\n"
+
+ resp = File.open( "#{temp_res}", 'r' )
+ while 1
+ while resp.gets
+ break if $_ =~ /<Hit_id>(.*)<\/Hit_id>/ || $_ =~ /(<Iteration_stat>)/
+ end
+ id = $1
+ break if $_ =~ /<Iteration_stat>/
+# p id
+ while resp.gets
+ break if $_ =~ /<Hsp_bit-score>(.*)<\/Hsp_bit-score>/
+ end
+ score = $1.to_f
+# p score
+
+ known = ids.index( id )
+ if known != nil then
+ if sco[known] >= score then
+ next
+ else
+ ids.delete_at( known )
+ add.delete_at( known )
+ sco.delete_at( known )
+ end
+ end
+ while resp.gets
+ break if $_ =~ /<Hsp_hseq>(.*)<\/Hsp_hseq>/
+ end
+# break if $1 == nil
+ target = $1.sub( /-/, "" ).sub( /U/, "X" )
+# p target
+# STDERR.puts "adding 1 seq"
+ ids.push( id )
+ sco.push( score )
+ add.push( target )
+ end
+ resp.close
+end
+
+n = ids.length
+
+outnum = 0
+while n > 0 && outnum < nadd
+ m = rand( n )
+ afp.puts ">_addedbymaffte_" + ids[m]
+ afp.puts add[m]
+ ids.delete_at( m )
+ add.delete_at( m )
+ n -= 1
+ outnum += 1
+end
+afp.close
+
+STDERR.puts "Performing alignment .. "
+system( mafftpath + mafftopt + " #{temp_af} > #{temp_bf}" )
+STDERR.puts "done."
+
+bfp = File.open( "#{temp_bf}", 'r' )
+outseq = []
+outnam = []
+readfasta( bfp, outnam, outseq )
+bfp.close
+
+outseq2 = []
+outnam2 = []
+
+len = outseq.length
+for i in 0..(len-1)
+# p outnam[i]
+ if fullout == 0 && outnam[i] =~ /_addedbymaffte_/ then
+ next
+ end
+ outseq2.push( outseq[i] )
+ outnam2.push( outnam[i].sub( /_addedbymaffte_/, "_ho_" ) )
+end
+
+nout = outseq2.length
+len = outseq[0].length
+p = len
+while p>0
+ p -= 1
+ allgap = 1
+ for j in 0..(nout-1)
+ if outseq2[j][p,1] != "-" then
+ allgap = 0
+ break
+ end
+ end
+ if allgap == 1 then
+ for j in 0..(nout-1)
+ outseq2[j][p,1] = ""
+ end
+ end
+end
+for i in 0..(nout-1)
+ puts ">" + outnam2[i]
+ puts outseq2[i].gsub( /.{1,60}/, "\\0\n" )
+end
+
+
+system( "rm -rf #{temp_if} #{temp_vf} #{temp_af} #{temp_bf} #{temp_pf} #{temp_qf} #{temp_res} #{temp_rid}" )
+if File.exist?( "#{temp_af}.tree" ) then
+ system( "sed 's/_addedbymaffte_/_ho_/' #{temp_af}.tree > #{ARGV[0].to_s}.tree" )
+ system( "rm #{temp_af}.tree" )
+end
# -w entire sequences are subjected to BLAST search
# (default: well-aligned region only)
-require 'getopts'
+#require 'getopts'
+require 'optparse'
require 'tempfile'
# mktemp
break if $_ =~ /MAFFT v/
end
pfp.close
+
if( $_ ) then
- mafftversion = sub( /^\D*/, "" ).split(" ").slice(0).strip.to_s
+ mafftversion = $_.sub( /^\D*/, "" ).split(" ").slice(0).strip.to_s
else
mafftversion = "0"
end
if( mafftversion < "5.58" ) then
- puts ""
- puts "======================================================"
- puts "Install new mafft (v. >= 5.58)"
- puts "======================================================"
- puts ""
+ STDERR.puts ""
+ STDERR.puts "======================================================"
+ STDERR.puts "Install new mafft (v. >= 5.58)"
+ STDERR.puts "======================================================"
+ STDERR.puts ""
exit
end
corewin = 50
corethr = 0.3
mafftopt = " --op 1.53 --ep 0.123 --localpair --maxiterate 1000 --reorder "
-if getopts( "s", "f", "w", "l", "h", "e:", "a:", "o:", "c:", "d:" ) == nil || ARGV.length == 0 || $OPT_h then
- puts "Usage: #{$0} [-h -l -e# -a# -o\"[options for mafft]\"] input_file"
- exit
-end
-if $OPT_c then
- corewin = $OPT_c.to_i
+
+#if getopts( "s", "f", "w", "l", "h", "e:", "a:", "o:", "c:", "d:" ) == nil || ARGV.length == 0 || $OPT_h then
+# puts "Usage: #{$0} [-h -l -e# -a# -o\"[options for mafft]\"] input_file"
+# exit
+#end
+params = ARGV.getopts( "sfwlhe:a:o:c:d:" )
+
+
+#if $OPT_c then
+if params["c"] != nil then
+ corewin = params["c"].to_i
end
-if $OPT_d then
- corethr = $OPT_d.to_f
+
+#if $OPT_d then
+if params["d"] != nil then
+ corethr = params["d"].to_f
end
-if $OPT_w
+
+#if $OPT_w
+if params["w"] == true then
entiresearch = 1
end
-if $OPT_f
+
+#if $OPT_f
+if params["f"] == true then
fullout = 1
end
-if $OPT_s
+
+#if $OPT_s
+if params["s"] == true then
fullout = 0
end
-if $OPT_l
+
+#if $OPT_l
+if params["l"] == true then
local = 1
end
-if $OPT_e then
- eval = $OPT_e.to_f
+
+#if $OPT_e then
+if params["e"] != nil then
+# eval = $OPT_e.to_f
+ eval = params["e"].to_f
end
-if $OPT_a then
- nadd = $OPT_a.to_i
+
+#if $OPT_a then
+if params["a"] != nil then
+ nadd = params["a"].to_i
end
-if $OPT_o then
- mafftopt += " " + $OPT_o + " "
+
+#if $OPT_o then
+if params["o"] != nil then
+ mafftopt += " " + params["o"] + " "
end
-system "cat " + ARGV.to_s + " > #{temp_if}"
+infn = ARGV[0].to_s.strip
+
+system "cat " + infn + " > #{temp_if}"
ar = mafftopt.split(" ")
nar = ar.length
for i in 0..(nar-1)
end
if local == 0 then
- command = "lynx -source 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?QUERY=" + inseq[i] + "&DATABASE=swissprot&HITLIST_SIZE=" + nadd.to_s + "&FILTER=L&EXPECT='" + eval.to_s + "'&FORMAT_TYPE=TEXT&PROGRAM=blastp&SERVICE=plain&NCBI_GI=on&PAGE=Proteins&CMD=Put' > #{temp_rid}"
+ command = "lynx -source 'https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?QUERY=" + inseq[i] + "&DATABASE=swissprot&HITLIST_SIZE=" + nadd.to_s + "&FILTER=L&EXPECT='" + eval.to_s + "'&FORMAT_TYPE=TEXT&PROGRAM=blastp&SERVICE=plain&NCBI_GI=on&PAGE=Proteins&CMD=Put' > #{temp_rid}"
system command
ridp = File.open( "#{temp_rid}", 'r' )
while 1
STDERR.printf "."
sleep 10
- command = "lynx -source 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RID=" + rid + "&DESCRIPTIONS=500&ALIGNMENTS=" + nadd.to_s + "&ALIGNMENT_TYPE=Pairwise&OVERVIEW=no&CMD=Get&FORMAT_TYPE=XML' > #{temp_res}"
+ command = "lynx -source 'https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RID=" + rid + "&DESCRIPTIONS=500&ALIGNMENTS=" + nadd.to_s + "&ALIGNMENT_TYPE=Pairwise&OVERVIEW=no&CMD=Get&FORMAT_TYPE=XML' > #{temp_res}"
system command
resp = File.open( "#{temp_res}", 'r' )
# resp.gets
system( "rm -rf #{temp_if} #{temp_vf} #{temp_af} #{temp_bf} #{temp_pf} #{temp_qf} #{temp_res} #{temp_rid}" )
+if File.exist?( "#{temp_af}.tree" ) then
+ system( "sed 's/_addedbymaffte_/_ho_/' #{temp_af}.tree > #{ARGV[0].to_s}.tree" )
+ system( "rm #{temp_af}.tree" )
+end
case 'Q':
alg = 'Q';
break;
- case 'H':
- alg = 'H';
- break;
case 'A':
alg = 'A';
break;
tbrweight = 0;
break;
case 'z':
- fftThreshold = atoi( *++argv );
+ fftThreshold = myatoi( *++argv );
--argc;
goto nextoption;
case 'w':
- fftWinSize = atoi( *++argv );
+ fftWinSize = myatoi( *++argv );
--argc;
goto nextoption;
case 'Z':
--argc;
goto nextoption;
case 'k':
- kimuraR = atoi( *++argv );
+ kimuraR = myatoi( *++argv );
fprintf( stderr, "kappa = %d\n", kimuraR );
--argc;
goto nextoption;
case 'b':
- nblosum = atoi( *++argv );
+ nblosum = myatoi( *++argv );
scoremtx = 1;
fprintf( stderr, "blosum %d\n", nblosum );
--argc;
goto nextoption;
case 'j':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = JTT;
fprintf( stderr, "jtt %d\n", pamN );
--argc;
goto nextoption;
case 'm':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = TM;
fprintf( stderr, "tm %d\n", pamN );
int i;
int clus1, clus2;
int s1, s2;
- float pscore;
+ double pscore;
static char **name1, **name2;
double *effarr = eff;
double *effarr1 = NULL;
double *effarr2 = NULL;
static char *indication1, *indication2;
- float dumfl = 0.0;
+// double dumfl = 0.0;
+ double dumdb = 0.0;
int intdum;
#if DEBUG
double time1, time2;
fprintf( stderr, "\n" );
*/
- if( alg == 'Q' )
- fprintf( stderr, "before realignment, score = %f\n", naiveQpairscore( nseq1, nseq2, mseq1, mseq2, effarr1, effarr2, penalty ) );
- else if( alg == 'H' )
- fprintf( stderr, "before realignment, score = %f\n", naivepairscore( nseq1, nseq2, mseq1, mseq2, effarr1, effarr2, penalty ) );
-
commongappick( nseq1, mseq1 );
commongappick( nseq2, mseq2 );
if( use_fft )
{
if( alg == 'M' )
- pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum );
+ pscore = Falign_udpari_long( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, alloclen, &intdum );
else
- pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum, NULL, 0, NULL );
+ pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, alloclen, &intdum, NULL, 0, NULL );
}
else
{
if( alg == 'M' )
- pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
- else if( alg == 'H' )
- pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
- else if( alg == 'Q' )
- pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
+ pscore = MSalignmm( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
else
- pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+ pscore = A__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1 );
}
#if DEBUG
time2 = getrusage_sec();
fflush( stdout );
#endif
- if( alg == 'H' )
- fprintf( stderr, "before realignment, score = %f\n", naivepairscore( nseq1, nseq2, mseq1, mseq2, effarr1, effarr2, penalty ) );
- else if( alg == 'Q' )
- fprintf( stderr, "after realignment, score = %f\n", naiveQpairscore( nseq1, nseq2, mseq1, mseq2, effarr1, effarr2, penalty ) );
-
/*
fprintf( stderr, "after align 1 %s \n", indication1 );
--- /dev/null
+#! /usr/bin/env ruby
+require 'optparse'
+
+mafftpath = "/usr/local/bin/mafft"
+
+def cleartempfiles( filenames )
+ for f in filenames
+ system( "rm -rf #{f}" )
+ end
+end
+
+
+seed = 0
+scand = "50%"
+npick = 500
+infn = ""
+reorderoption = "--reorder"
+pickoptions = " --retree 1 "
+coreoptions = " --globalpair --maxiterate 100 "
+corelastarg = " "
+addoptions = " "
+directionoptions = " --retree 0 --pileup "
+markcore = ""
+randompickup = true
+outnum = false
+
+begin
+ params = ARGV.getopts('m:s:n:p:i:C:L:A:o:MhuD:')
+rescue => e
+ STDERR.puts e
+ STDERR.puts "See #{$0} -h"
+ exit 1
+end
+
+#p params
+
+mafftpath = params["m"] if params["m"]
+seed = params["s"].to_i if params["s"]
+scand = params["n"].to_s if params["n"]
+npick = params["p"].to_i if params["p"]
+infn = params["i"] if params["i"]
+#pickoptions += params["P"] if params["P"]
+coreoptions += params["C"] if params["C"] # tsuikagaki!
+corelastarg += params["L"] if params["L"] # tsuikagaki!
+addoptions += params["A"] if params["A"]
+directionoptions += params["D"] if params["D"] # tsuikagaki
+markcore = "*" if params["M"]
+#randompickup = false if params["S"]
+reorderoption = "" if params["o"] =~ /^i/
+outnum = true if params["u"]
+
+if params["h"] then
+ STDERR.puts "Usage: #{$0} -i inputfile [options]"
+ STDERR.puts "Options:"
+ STDERR.puts " -i string Input file."
+ STDERR.puts " -m string Mafft command. Default: mafft"
+ STDERR.puts " -s int Seed. Default:0"
+ STDERR.puts " -n int Number of candidates for core sequences. Default: upper 50% in length"
+ STDERR.puts " -p int Number of core sequences. Default: 500"
+# STDERR.puts " -P \"string\" Mafft options for the PICKUP stage."
+# STDERR.puts " Default: \"--retree 1\""
+# STDERR.puts " -S Tree-based pickup. Default: off"
+ STDERR.puts " -C \"string\" Mafft options for the CORE stage."
+ STDERR.puts " Default: \"--globalpair --maxiterate 100\""
+ STDERR.puts " -A \"string\" Mafft options for the ADD stage."
+ STDERR.puts " Default: \"\""
+ STDERR.puts " -D \"string\" Mafft options for inferring the direction of nucleotide sequences."
+ STDERR.puts " Default: \"\""
+ STDERR.puts " -o r or i r: Reorder the sequences based on similarity. Default"
+ STDERR.puts " i: Same as input."
+ exit 1
+end
+
+if infn == "" then
+ STDERR.puts "Give input file with -i."
+ exit 1
+end
+
+
+
+pid = $$.to_s
+tmpdir = ENV["TMPDIR"]
+tmpdir = "/tmp" if tmpdir == nil
+tempfiles = []
+tempfiles.push( temp_pf = tmpdir + "/_pf" + pid )
+tempfiles.push( temp_nf = tmpdir + "/_nf" + pid )
+tempfiles.push( temp_cf = tmpdir + "/_cf" + pid )
+tempfiles.push( temp_of = tmpdir + "/_of" + pid )
+
+Signal.trap(:INT){cleartempfiles( tempfiles ); exit 1}
+at_exit{ cleartempfiles( tempfiles )}
+
+system "#{mafftpath} --version > #{temp_of} 2>&1"
+
+fp = File.open( temp_of, "r" )
+ line = fp.gets
+fp.close
+
+
+versionnum = line.split(' ')[0].sub(/v/,"").to_f
+
+if versionnum < 7.210 then
+ STDERR.puts "\n"
+ STDERR.puts "Please use mafft version >= 7.210\n"
+ STDERR.puts "\n"
+ exit
+end
+
+srand( seed )
+
+def readfasta( fp, name, seq )
+ nseq = 0
+ tmpseq = ""
+ while fp.gets
+ if $_ =~ /^>/ then
+ name.push( $_.sub(/>/,"").chop )
+ seq.push( tmpseq ) if nseq > 0
+ nseq += 1
+ tmpseq = ""
+ else
+ tmpseq += $_.strip
+ end
+ end
+ seq.push( tmpseq )
+ return nseq
+end
+
+
+
+begin
+ infp = File.open( infn, "r" )
+rescue => e
+ STDERR.puts e
+ exit 1
+end
+infp.close
+
+if directionoptions =~ /--adjustdirection/ then
+ system( mafftpath + "#{directionoptions} #{infn} > #{temp_of}" )
+else
+ system( "cp #{infn} #{temp_of}" )
+end
+
+tname = []
+tseq = []
+infp = File.open( temp_of, "r" )
+tin = readfasta( infp, tname, tseq )
+infp.close
+lenhash = {}
+
+if outnum then
+ for i in 0..(tin-1)
+ tname[i] = "_numo_s_#{i}_numo_e_" + tname[i]
+ end
+end
+
+npick = 0 if npick == 1
+npick = tin if npick > tin
+
+
+if scand =~ /%$/ then
+ ncand = (tin * scand.to_f * 0.01 ).to_i
+else
+ ncand = scand.to_i
+end
+
+if ncand < 0 || ncand > tin then
+ STDERR.puts "Error. -n #{scand}?"
+ exit 1
+end
+
+ncand = npick if ncand < npick
+ncand = tin if ncand > tin
+
+STDERR.puts "ncand = #{ncand}, npick = #{npick}"
+
+
+sai = []
+for i in 0..(tin-1)
+ lenhash[i] = tseq[i].gsub(/-/,"").length
+end
+
+i = 0
+sorted = lenhash.sort_by{|key, value| [-value, i+=1]}
+#for i in 0..(ncand-1)
+# sai[sorted[i][0]] = 1
+#end
+#for i in ncand..(tin-1)
+# sai[sorted[i][0]] = 0
+#end
+
+ncandres = 0
+ntsukau = 0
+for i in 0..(tin-1)
+ cand = sorted[i][0]
+ if tname[cand] =~ /^_focus_/ then
+ sai[cand] = 0
+ ntsukau += 1
+ elsif ncandres < ncand then
+ unless tname[cand] =~ /^_tsukawanai_/ then
+ sai[cand] = 1
+ ncandres += 1
+ else
+ sai[cand] = 0
+ end
+ else
+ sai[cand] = 0
+ end
+end
+
+if ncandres+ntsukau < npick
+ STDERR.puts "ncandres = #{ncandres}"
+ STDERR.puts "ncand = #{ncand}"
+ STDERR.puts "ntsukau = #{ntsukau}"
+ STDERR.puts "npick = #{npick}"
+ STDERR.puts "Too many _tsukawanai_ sequences."
+ exit 1
+end
+
+if ntsukau > npick
+ STDERR.puts "ntsukau = #{ntsukau}"
+ STDERR.puts "npick = #{npick}"
+ STDERR.puts "Too many _focus_ sequences."
+ exit 1
+end
+
+#p sai
+#for i in 0..(tin-1)
+# puts sai[i].to_s + " " + tname[i]
+#end
+
+npickrand = npick - ntsukau
+
+if randompickup
+ pick = []
+ for i in 0..(npickrand-1)
+ pick[i] = 1
+ end
+ for i in npickrand..(ncandres-1)
+ pick[i] = 0
+ end
+ pick2 = pick.sort_by{rand}
+ pick = pick2
+# p pick
+# p sai
+
+ ipick = 0
+ for i in 0..(tin-1)
+ if sai[i] == 1 then
+ if pick[ipick] == 0 then
+ sai[i] = 0
+ end
+ ipick += 1
+ end
+ end
+# p sai
+
+ for i in 0..(tin-1)
+ if tname[i] =~ /^_focus_/ then
+ sai[i] = 1
+ end
+ end
+# p sai
+
+ pfp = File.open( temp_pf, 'w' )
+ nfp = File.open( temp_nf, 'w' )
+
+ i = 0
+ while i < tin
+ if sai[i] == 1 then
+ pfp.puts ">" + i.to_s + " " + ">" + markcore + tname[i]
+ pfp.puts tseq[i]
+ else
+ nfp.puts ">" + i.to_s + " " + ">" + tname[i]
+ nfp.puts tseq[i]
+ end
+ i += 1
+ end
+
+ nfp.close
+ pfp.close
+
+else # yamerukamo
+ STDERR.puts "Not supported in this version"
+ exit 1
+end
+
+if npick > 1 then
+ if npick < tin then
+ system( mafftpath + " #{coreoptions} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # add de sort
+ else
+ system( mafftpath + " #{coreoptions} #{reorderoption} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # ima sort
+ end
+ res = ( File::stat(temp_cf).size == 0 )
+else
+ system( "cat /dev/null > #{temp_cf}" )
+ res = false
+end
+
+if res == true then
+ STDERR.puts "\n\nError in the core alignment stage.\n\n"
+ exit 1
+end
+
+
+if npick < tin
+ system( mafftpath + " #{addoptions} #{reorderoption} --add #{temp_nf} #{temp_cf} > #{temp_of}" )
+ res = ( File::stat(temp_of).size == 0 )
+else
+ system( "cp #{temp_cf} #{temp_of}" )
+ res = false
+end
+
+if res == true then
+ STDERR.puts "\n\nError in the add stage.\n\n"
+ exit 1
+end
+
+resname = []
+resseq = []
+resfp = File.open( temp_of, "r" )
+nres = readfasta( resfp, resname, resseq )
+resfp.close
+
+if reorderoption =~ /--reorder/ then
+ for i in 0..(nres-1)
+ puts ">" + resname[i].sub(/^[0-9]* >/,"")
+ puts resseq[i]
+ end
+else
+ seqhash = {}
+ namehash = {}
+ seqlast = []
+ namelast = []
+ nlast = 0
+ for i in 0..(nres-1)
+ if resname[i] =~ /^[0-9]* >/
+ key = resname[i].split(' ')[0]
+ seqhash[key] = resseq[i]
+ namehash[key] = resname[i]
+ else
+ seqlast.push( resseq[i] )
+ namelast.push( resname[i] )
+ nlast += 1
+ end
+ end
+ for i in 0..(nlast-1)
+ puts ">" + namelast[i]
+ puts seqlast[i]
+ end
+ for i in 0..(nres-nlast-1)
+ key = i.to_s
+ puts ">" + namehash[key].sub(/^[0-9]* >/,"")
+ puts seqhash[key]
+ end
+end
+
+
--- /dev/null
+#! /usr/bin/env ruby
+require 'optparse'
+
+mafftpath = "_BINDIR/mafft"
+
+def cleartempfiles( filenames )
+ for f in filenames
+ system( "rm -rf #{f}" )
+ end
+end
+
+
+seed = 0
+scand = "50%"
+npick = 500
+infn = ""
+reorderoption = "--reorder"
+pickoptions = " --retree 1 "
+coreoptions = " --globalpair --maxiterate 100 "
+corelastarg = " "
+addoptions = " "
+directionoptions = " --retree 0 --pileup "
+markcore = ""
+randompickup = true
+outnum = false
+
+begin
+ params = ARGV.getopts('m:s:n:p:i:C:L:A:o:MhuD:')
+rescue => e
+ STDERR.puts e
+ STDERR.puts "See #{$0} -h"
+ exit 1
+end
+
+#p params
+
+mafftpath = params["m"] if params["m"]
+seed = params["s"].to_i if params["s"]
+scand = params["n"].to_s if params["n"]
+npick = params["p"].to_i if params["p"]
+infn = params["i"] if params["i"]
+#pickoptions += params["P"] if params["P"]
+coreoptions += params["C"] if params["C"] # tsuikagaki!
+corelastarg += params["L"] if params["L"] # tsuikagaki!
+addoptions += params["A"] if params["A"]
+directionoptions += params["D"] if params["D"] # tsuikagaki
+markcore = "*" if params["M"]
+#randompickup = false if params["S"]
+reorderoption = "" if params["o"] =~ /^i/
+outnum = true if params["u"]
+
+if params["h"] then
+ STDERR.puts "Usage: #{$0} -i inputfile [options]"
+ STDERR.puts "Options:"
+ STDERR.puts " -i string Input file."
+ STDERR.puts " -m string Mafft command. Default: mafft"
+ STDERR.puts " -s int Seed. Default:0"
+ STDERR.puts " -n int Number of candidates for core sequences. Default: upper 50% in length"
+ STDERR.puts " -p int Number of core sequences. Default: 500"
+# STDERR.puts " -P \"string\" Mafft options for the PICKUP stage."
+# STDERR.puts " Default: \"--retree 1\""
+# STDERR.puts " -S Tree-based pickup. Default: off"
+ STDERR.puts " -C \"string\" Mafft options for the CORE stage."
+ STDERR.puts " Default: \"--globalpair --maxiterate 100\""
+ STDERR.puts " -A \"string\" Mafft options for the ADD stage."
+ STDERR.puts " Default: \"\""
+ STDERR.puts " -D \"string\" Mafft options for inferring the direction of nucleotide sequences."
+ STDERR.puts " Default: \"\""
+ STDERR.puts " -o r or i r: Reorder the sequences based on similarity. Default"
+ STDERR.puts " i: Same as input."
+ exit 1
+end
+
+if infn == "" then
+ STDERR.puts "Give input file with -i."
+ exit 1
+end
+
+
+
+pid = $$.to_s
+tmpdir = ENV["TMPDIR"]
+tmpdir = "/tmp" if tmpdir == nil
+tempfiles = []
+tempfiles.push( temp_pf = tmpdir + "/_pf" + pid )
+tempfiles.push( temp_nf = tmpdir + "/_nf" + pid )
+tempfiles.push( temp_cf = tmpdir + "/_cf" + pid )
+tempfiles.push( temp_of = tmpdir + "/_of" + pid )
+
+Signal.trap(:INT){cleartempfiles( tempfiles ); exit 1}
+at_exit{ cleartempfiles( tempfiles )}
+
+system "#{mafftpath} --version > #{temp_of} 2>&1"
+
+fp = File.open( temp_of, "r" )
+ line = fp.gets
+fp.close
+
+
+versionnum = line.split(' ')[0].sub(/v/,"").to_f
+
+if versionnum < 7.210 then
+ STDERR.puts "\n"
+ STDERR.puts "Please use mafft version >= 7.210\n"
+ STDERR.puts "\n"
+ exit
+end
+
+srand( seed )
+
+def readfasta( fp, name, seq )
+ nseq = 0
+ tmpseq = ""
+ while fp.gets
+ if $_ =~ /^>/ then
+ name.push( $_.sub(/>/,"").chop )
+ seq.push( tmpseq ) if nseq > 0
+ nseq += 1
+ tmpseq = ""
+ else
+ tmpseq += $_.strip
+ end
+ end
+ seq.push( tmpseq )
+ return nseq
+end
+
+
+
+begin
+ infp = File.open( infn, "r" )
+rescue => e
+ STDERR.puts e
+ exit 1
+end
+infp.close
+
+if directionoptions =~ /--adjustdirection/ then
+ system( mafftpath + "#{directionoptions} #{infn} > #{temp_of}" )
+else
+ system( "cp #{infn} #{temp_of}" )
+end
+
+tname = []
+tseq = []
+infp = File.open( temp_of, "r" )
+tin = readfasta( infp, tname, tseq )
+infp.close
+lenhash = {}
+
+if outnum then
+ for i in 0..(tin-1)
+ tname[i] = "_numo_s_#{i}_numo_e_" + tname[i]
+ end
+end
+
+npick = 0 if npick == 1
+npick = tin if npick > tin
+
+
+if scand =~ /%$/ then
+ ncand = (tin * scand.to_f * 0.01 ).to_i
+else
+ ncand = scand.to_i
+end
+
+if ncand < 0 || ncand > tin then
+ STDERR.puts "Error. -n #{scand}?"
+ exit 1
+end
+
+ncand = npick if ncand < npick
+ncand = tin if ncand > tin
+
+STDERR.puts "ncand = #{ncand}, npick = #{npick}"
+
+
+sai = []
+for i in 0..(tin-1)
+ lenhash[i] = tseq[i].gsub(/-/,"").length
+end
+
+i = 0
+sorted = lenhash.sort_by{|key, value| [-value, i+=1]}
+#for i in 0..(ncand-1)
+# sai[sorted[i][0]] = 1
+#end
+#for i in ncand..(tin-1)
+# sai[sorted[i][0]] = 0
+#end
+
+ncandres = 0
+ntsukau = 0
+for i in 0..(tin-1)
+ cand = sorted[i][0]
+ if tname[cand] =~ /^_focus_/ then
+ sai[cand] = 0
+ ntsukau += 1
+ elsif ncandres < ncand then
+ unless tname[cand] =~ /^_tsukawanai_/ then
+ sai[cand] = 1
+ ncandres += 1
+ else
+ sai[cand] = 0
+ end
+ else
+ sai[cand] = 0
+ end
+end
+
+if ncandres+ntsukau < npick
+ STDERR.puts "ncandres = #{ncandres}"
+ STDERR.puts "ncand = #{ncand}"
+ STDERR.puts "ntsukau = #{ntsukau}"
+ STDERR.puts "npick = #{npick}"
+ STDERR.puts "Too many _tsukawanai_ sequences."
+ exit 1
+end
+
+if ntsukau > npick
+ STDERR.puts "ntsukau = #{ntsukau}"
+ STDERR.puts "npick = #{npick}"
+ STDERR.puts "Too many _focus_ sequences."
+ exit 1
+end
+
+#p sai
+#for i in 0..(tin-1)
+# puts sai[i].to_s + " " + tname[i]
+#end
+
+npickrand = npick - ntsukau
+
+if randompickup
+ pick = []
+ for i in 0..(npickrand-1)
+ pick[i] = 1
+ end
+ for i in npickrand..(ncandres-1)
+ pick[i] = 0
+ end
+ pick2 = pick.sort_by{rand}
+ pick = pick2
+# p pick
+# p sai
+
+ ipick = 0
+ for i in 0..(tin-1)
+ if sai[i] == 1 then
+ if pick[ipick] == 0 then
+ sai[i] = 0
+ end
+ ipick += 1
+ end
+ end
+# p sai
+
+ for i in 0..(tin-1)
+ if tname[i] =~ /^_focus_/ then
+ sai[i] = 1
+ end
+ end
+# p sai
+
+ pfp = File.open( temp_pf, 'w' )
+ nfp = File.open( temp_nf, 'w' )
+
+ i = 0
+ while i < tin
+ if sai[i] == 1 then
+ pfp.puts ">" + i.to_s + " " + ">" + markcore + tname[i]
+ pfp.puts tseq[i]
+ else
+ nfp.puts ">" + i.to_s + " " + ">" + tname[i]
+ nfp.puts tseq[i]
+ end
+ i += 1
+ end
+
+ nfp.close
+ pfp.close
+
+else # yamerukamo
+ STDERR.puts "Not supported in this version"
+ exit 1
+end
+
+if npick > 1 then
+ if npick < tin then
+ system( mafftpath + " #{coreoptions} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # add de sort
+ else
+ system( mafftpath + " #{coreoptions} #{reorderoption} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # ima sort
+ end
+ res = ( File::stat(temp_cf).size == 0 )
+else
+ system( "cat /dev/null > #{temp_cf}" )
+ res = false
+end
+
+if res == true then
+ STDERR.puts "\n\nError in the core alignment stage.\n\n"
+ exit 1
+end
+
+
+if npick < tin
+ system( mafftpath + " #{addoptions} #{reorderoption} --add #{temp_nf} #{temp_cf} > #{temp_of}" )
+ res = ( File::stat(temp_of).size == 0 )
+else
+ system( "cp #{temp_cf} #{temp_of}" )
+ res = false
+end
+
+if res == true then
+ STDERR.puts "\n\nError in the add stage.\n\n"
+ exit 1
+end
+
+resname = []
+resseq = []
+resfp = File.open( temp_of, "r" )
+nres = readfasta( resfp, resname, resseq )
+resfp.close
+
+if reorderoption =~ /--reorder/ then
+ for i in 0..(nres-1)
+ puts ">" + resname[i].sub(/^[0-9]* >/,"")
+ puts resseq[i]
+ end
+else
+ seqhash = {}
+ namehash = {}
+ seqlast = []
+ namelast = []
+ nlast = 0
+ for i in 0..(nres-1)
+ if resname[i] =~ /^[0-9]* >/
+ key = resname[i].split(' ')[0]
+ seqhash[key] = resseq[i]
+ namehash[key] = resname[i]
+ else
+ seqlast.push( resseq[i] )
+ namelast.push( resname[i] )
+ nlast += 1
+ end
+ end
+ for i in 0..(nlast-1)
+ puts ">" + namelast[i]
+ puts seqlast[i]
+ end
+ for i in 0..(nres-nlast-1)
+ key = i.to_s
+ puts ">" + namehash[key].sub(/^[0-9]* >/,"")
+ puts seqhash[key]
+ end
+end
+
+
.\" disable justification (adjust text to left margin only)
.ad l
.SH "THIS MANUAL IS FOR V6.2XX (2007)"
-Recent versions (v6.8xx; 2010 Nov.) have more features than those described below.
+Recent versions (v7.1xx; 2013 Jan.) have more features than those described below.
See also the tips page at
http://mafft.cbrc.jp/alignment/software/tips0.html
.SH "NAME"
:_NotX64\r
\r
set ROOTDIR="%~d0%~p0\ms"\r
-set PATH=/bin/\r
+set PATH=/bin/:%PATH%\r
set MAFFT_BINARIES=/lib/mafft\r
+set TMPDIR=%~d0%~p0/ms/tmp\r
\r
%ROOTDIR%\bin\sh %ROOTDIR%\bin\mafft %*\r
\r
--- /dev/null
+@echo off
+
+setlocal
+
+set ROOTDIR="%~d0%~p0\ms"
+set PATH=/bin/:%PATH%
+set MAFFT_BINARIES=/lib/mafft
+set TMPDIR=%~d0%~p0/ms/tmp
+
+%ROOTDIR%\bin\sh %ROOTDIR%\bin\mafft %*
+
+:EOF
--- /dev/null
+extern int disttbfast( int ngui, int lgui, char **namegui, char **seqgui, int argc, char **argv, int (*callback)(int, int, char*));
+#define GUI_ERROR 1
+#define GUI_LENGTHOVER 2
+#define GUI_CANCEL 3
-#! /bin/sh
-
+#! /bin/bash
er=0;
myself=`dirname "$0"`/`basename "$0"`; export myself
-version="v6.857b (2011/05/30)"; export version
+version="v7.310 (2017/Mar/17)"; export version
LANG=C; export LANG
os=`uname`
progname=`basename "$0"`
if [ `echo $os | grep -i cygwin` ]; then
os="cygwin"
+elif [ `echo $os | grep -i mingw` ]; then
+ os="mingw"
elif [ `echo $os | grep -i darwin` ]; then
os="darwin"
elif [ `echo $os | grep -i sunos` ]; then
fi
fi
-if [ ! -x "$prefix/tbfast" ]; then
+if [ -x "$prefix/version" ]; then
+ versionbin=`"$prefix/version"` # for cygwin 2.7
+ else
+ versionbin="0.000"
+fi
+
+if ! expr "$version" : v"$versionbin" > /dev/null ; then
+ echo "" 1>&2
+ echo "v$versionbin != $version" 1>&2
+ echo "" 1>&2
+ echo "There is a problem in the configuration of your shell." 1>&2
+ echo "Check the MAFFT_BINARIES environmental variable by" 1>&2
+ echo "$ echo \$MAFFT_BINARIES" 1>&2
+ echo "" 1>&2
+ echo "This variable must be *unset*, unless you have installed MAFFT" 1>&2
+ echo "with a special configuration. To unset this variable, type" 1>&2
+ echo "$ unset MAFFT_BINARIES" 1>&2
+ echo "or" 1>&2
+ echo "% unsetenv MAFFT_BINARIES" 1>&2
+ echo "Then retry" 1>&2
+ echo "$ mafft input > output" 1>&2
+ echo "" 1>&2
+ echo "To keep this change permanently, edit setting files" 1>&2
+ echo "(.bash_profile, .profile, .cshrc, etc) in your home directory" 1>&2
+ echo "to delete the MAFFT_BINARIES line." 1>&2
+ echo "On MacOSX, also edit or remove the .MacOSX/environment.plist file" 1>&2
+ echo "and then re-login (MacOSX 10.6) or reboot (MacOSX 10.7)." 1>&2
echo "" 1>&2
- echo "correctly installed?" 1>&2
- echo "mafft binaries have to be installed in \$MAFFT_BINARIES" 1>&2
- echo "or the $prefix directory". 1>&2
+ echo "Please send a problem report to kazutaka.katoh@aist.go.jp," 1>&2
+ echo "if this problem remains." 1>&2
echo "" 1>&2
exit 1
er=1
defaultlgop="-2.00"
defaultfft=1
defaultrough=0
-defaultdistance="sixtuples"
+defaultdistance="ktuples"
#defaultdistance="local"
defaultweighti="2.7"
defaultweightr="0.0"
defaultweightm="1.0"
+defaultdafs=0
defaultmccaskill=0
defaultcontrafold=0
defaultalgopt=" "
defaultweightm="2.0"
defaultmccaskill=1
defaultcontrafold=0
+ defaultdafs=0
defaultalgopt=" -A "
defaultalgoptit=" -AB " ## chui
defaultaof="0.0"
defaultsbstmodel=" -b 62 "
defaultkappa=" "
- defaultfmodel=" -a "
+ defaultfmodel=" " # 2013/06/18
elif [ $progname = "qinsi" -o $progname = "mafft-qinsi" ]; then
defaultfft=1
defaultcycle=1
defaultweightm="2.0"
defaultmccaskill=1
defaultcontrafold=0
+ defaultdafs=0
defaultalgopt=" -A "
defaultalgoptit=" -AB " ## chui
defaultaof="0.0"
defaultsbstmodel=" -b 62 "
defaultkappa=" "
- defaultfmodel=" -a "
+ defaultfmodel=" " # 2013/06/18
elif [ $progname = "linsi" -o $progname = "mafft-linsi" ]; then
defaultfft=0
defaultcycle=1
elif [ $progname = "fftns" -o $progname = "mafft-fftns" ]; then
defaultfft=1
defaultcycle=2
- defaultdistance="sixtuples"
+ defaultdistance="ktuples"
elif [ $progname = "fftnsi" -o $progname = "mafft-fftnsi" ]; then
defaultfft=1
defaultcycle=2
defaultiterate=2
- defaultdistance="sixtuples"
+ defaultdistance="ktuples"
elif [ $progname = "nwns" -o $progname = "mafft-nwns" ]; then
defaultfft=0
defaultcycle=2
- defaultdistance="sixtuples"
+ defaultdistance="ktuples"
elif [ $progname = "nwnsi" -o $progname = "mafft-nwnsi" ]; then
defaultfft=0
defaultcycle=2
defaultiterate=2
- defaultdistance="sixtuples"
+ defaultdistance="ktuples"
fi
outputfile=""
-namelength=15
+namelength=-1
anysymbol=0
parallelizationstrategy="BAATARI2"
kappa=$defaultkappa
sbstmodel=$defaultsbstmodel
fmodel=$defaultfmodel
+nmodel=" "
gop=$defaultgop
+gopdist=$defaultgop
aof=$defaultaof
cycle=$defaultcycle
iterate=$defaultiterate
weightr=$defaultweightr
weightm=$defaultweightm
rnaalifold=0
+dafs=$defaultdafs
mccaskill=$defaultmccaskill
contrafold=$defaultcontrafold
-quiet=0
+progressfile="/dev/stderr"
debug=0
sw=0
algopt=$defaultalgopt
algoptit=$defaultalgoptit
+#algspecified=0
+pairspecified=0
scorecalcopt=" "
coreout=0
corethr="0.5"
corewin="100"
coreext=" "
outputformat="pir"
+f2clext="-N"
outorder="input"
seed="x"
seedtable="x"
auto=0
groupsize=-1
partsize=50
-partdist="sixtuples"
+partdist="ktuples"
partorderopt=" -x "
treeout=0
distout=0
treeinopt=" "
seedfiles="/dev/null"
seedtablefile="/dev/null"
+pdblist="/dev/null"
+ownlist="/dev/null"
+strdir="$PWD"
aamatrix="/dev/null"
treeinfile="/dev/null"
rnascoremtx=" "
laraparams="/dev/null"
foldalignopt=" "
-treealg=" -X "
+treealg=" -X 0.1 "
+sueff="1.0"
scoreoutarg=" "
numthreads=0
+numthreadsit=-1
+numthreadstb=-1
randomseed=0
addfile="/dev/null"
addarg0=" "
+addarg=" "
+addsinglearg=" "
+add2ndhalfarg=" "
+mapoutfile="/dev/null"
+fragment=0
+legacygapopt=" "
+mergetable="/dev/null"
+mergearg=" "
+seedoffset=0
outnum=" "
+last_e=5000
+last_m=3
+last_subopt=" "
+last_once=" "
+adjustdirection=0
+tuplesize=6
+termgapopt=" -O "
+#termgapopt=" " # gap/gap ga kakenai node
+similarityoffset="0.0"
+unalignlevel="0.0"
+unalignspecified=0
+spfactor="100.0"
+shiftpenaltyspecified=0
+opdistspecified=0
+allowshift=0
+enrich=0
+enrichseq=0
+enrichstr=0
+seektarget=""
+fixthreshold="0.0"
+bunkatsuopt=" "
+npickup=0
+minimumweight="0.00001" # 2016/Mar
+usenaivepairscore=" "
+oldgenafparam=0
+sprigorous=0
+pileuporshuffle="l"
+initialramusage="20GB"
+focusarg=" "
if [ $# -gt 0 ]; then
+ if [ "$1" = "--version" ]; then
+ echo "$version" 1>&2
+ exit 0;
+ elif [ "$1" = "--help" -o "$1" = "--info" ]; then
+ shift
+ er=1;
+ fi
while [ $# -gt 1 ];
do
if [ "$1" = "--auto" ]; then
partorderopt=" -x "
elif [ "$1" = "--unweight" ]; then
weightopt=" -u "
+ elif [ "$1" = "--termgappenalty" ]; then
+ termgapopt=" "
+ elif [ "$1" = "--alga" ]; then
+ algopt=" "
+ algoptit=" "
+# algspecified=1
elif [ "$1" = "--algq" ]; then
algopt=" -Q "
- algoptit=" -QB "
+ algoptit=" "
+ echo "" 1>&2
+ echo "--algq is no longer supported!" 1>&2
+ echo "" 1>&2
+ exit 1;
+# algspecified=1
elif [ "$1" = "--namelength" ]; then
shift
namelength=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify the length of name in clustal format output!" 1>&2
+ exit
+ fi
elif [ "$1" = "--groupsize" ]; then
shift
groupsize=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify groupsize!" 1>&2
+ exit
+ fi
elif [ "$1" = "--partsize" ]; then
shift
partsize=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify partsize!" 1>&2
+ exit
+ fi
elif [ "$1" = "--parttree" ]; then
distance="parttree"
- partdist="sixtuples"
+ partdist="ktuples"
elif [ "$1" = "--dpparttree" ]; then
distance="parttree"
partdist="localalign"
distout=1
elif [ "$1" = "--fastswpair" ]; then
distance="fasta"
+ pairspecified=1
sw=1
elif [ "$1" = "--fastapair" ]; then
distance="fasta"
+ pairspecified=1
sw=0
elif [ "$1" = "--averagelinkage" ]; then
- treealg=" -E "
+ treealg=" -X 1.0 "
+ sueff="1.0"
elif [ "$1" = "--minimumlinkage" ]; then
- treealg=" -q "
+ treealg=" -X 0.0 "
+ sueff="0.0"
+ elif [ "$1" = "--mixedlinkage" ]; then
+ shift
+ sueff="$1"
+ treealg=" -X $1"
elif [ "$1" = "--noscore" ]; then
scorecalcopt=" -Z "
+ elif [ "$1" = "--6mermultipair" ]; then
+ distance="ktuplesmulti"
+ tuplesize=6
+ pairspecified=1
+ elif [ "$1" = "--10mermultipair" ]; then
+ distance="ktuplesmulti"
+ tuplesize=10
+ pairspecified=1
elif [ "$1" = "--6merpair" ]; then
- distance="sixtuples"
+ distance="ktuples"
+ tuplesize=6
+ pairspecified=1
+ elif [ "$1" = "--10merpair" ]; then
+ distance="ktuples"
+ tuplesize=10
+ pairspecified=1
elif [ "$1" = "--blastpair" ]; then
distance="blast"
+ pairspecified=1
+ elif [ "$1" = "--lastmultipair" ]; then
+ distance="lastmulti"
+ pairspecified=1
elif [ "$1" = "--globalpair" ]; then
distance="global"
+ pairspecified=1
+ elif [ "$1" = "--shortlongpair" ]; then
+ distance="local"
+ usenaivepairscore="-Z"
+ laof=0.0 # addfull no tokini tsukawareru.
+ lexp=0.0 # addfull no tokini tsukawareru.
+ pgaof=0.0 # local nara iranai
+ pgexp=0.0 # local nara iranai
+ pairspecified=1
+ elif [ "$1" = "--longshortpair" ]; then
+ distance="local"
+ usenaivepairscore="-Z"
+ laof=0.0 # addfull no tokini tsukawareru.
+ lexp=0.0 # addfull no tokini tsukawareru.
+ pgaof=0.0 # local nara iranai
+ pgexp=0.0 # local nara iranai
+ pairspecified=1
elif [ "$1" = "--localpair" ]; then
distance="local"
+ pairspecified=1
+ elif [ "$1" = "--lastpair" ]; then
+ distance="last"
+ pairspecified=1
+ elif [ "$1" = "--multipair" ]; then
+ distance="multi"
+ pairspecified=1
+ elif [ "$1" = "--hybridpair" ]; then
+ distance="hybrid"
+ pairspecified=1
elif [ "$1" = "--scarnapair" ]; then
distance="scarna"
+ pairspecified=1
+ elif [ "$1" = "--dafspair" ]; then
+ distance="dafs"
+ pairspecified=1
elif [ "$1" = "--larapair" ]; then
distance="lara"
+ pairspecified=1
elif [ "$1" = "--slarapair" ]; then
distance="slara"
+ pairspecified=1
elif [ "$1" = "--foldalignpair" ]; then
distance="foldalignlocal"
+ pairspecified=1
elif [ "$1" = "--foldalignlocalpair" ]; then
distance="foldalignlocal"
+ pairspecified=1
elif [ "$1" = "--foldalignglobalpair" ]; then
distance="foldalignglobal"
+ pairspecified=1
elif [ "$1" = "--globalgenafpair" ]; then
distance="globalgenaf"
+ pairspecified=1
+ echo "" 1>&2
+ echo "--globalgenaf is no longer supported!" 1>&2
+ echo "" 1>&2
+ exit 1;
elif [ "$1" = "--localgenafpair" ]; then
distance="localgenaf"
+ pairspecified=1
elif [ "$1" = "--genafpair" ]; then
distance="localgenaf"
+ pairspecified=1
+ elif [ "$1" = "--oldgenafpair" ]; then
+ distance="localgenaf"
+ pairspecified=1
+ oldgenafparam=1
elif [ "$1" = "--memsave" ]; then
memopt=" -M -B " # -B (bunkatsunashi no riyu ga omoidasenai)
elif [ "$1" = "--nomemsave" ]; then
elif [ "$1" = "--nofft" ]; then
fft=0
elif [ "$1" = "--quiet" ]; then
- quiet=1
+ if [ $os = "mingw" ]; then
+ progressfile="nul"
+ else
+ progressfile="/dev/null"
+ fi
elif [ "$1" = "--debug" ]; then
debug=1
elif [ "$1" = "--coreext" ]; then
coreext=" -c "
elif [ "$1" = "--core" ]; then
coreout=1
+ elif [ "$1" = "--adjustdirection" ]; then
+ adjustdirection=1
+ elif [ "$1" = "--adjustdirectionaccurately" ]; then
+ adjustdirection=2
+ elif [ "$1" = "--progress" ]; then
+ shift
+ progressfile="$1"
+ if ! ( expr "$progressfile" : "\/" > /dev/null || expr "$progressfile" : "[A-Za-z]\:" > /dev/null ) ; then
+ echo "Specify a progress file name with the absolute path!" 1>&2
+ exit
+ fi
elif [ "$1" = "--out" ]; then
shift
outputfile="$1"
elif [ "$1" = "--thread" ]; then
shift
+ if ! expr "$1" : "[0-9\-]" > /dev/null ; then
+ echo "Specify the number of threads. Or, use --thread -1" 1>&2
+ exit
+ fi
numthreads=`expr "$1" - 0`
+ elif [ "$1" = "--threadtb" ]; then
+ shift
+ if ! expr "$1" : "[0-9\-]" > /dev/null ; then
+ echo "Specify the number of threads for the iterative step!" 1>&2
+ exit
+ fi
+ numthreadstb=`expr "$1" - 0`
+ elif [ "$1" = "--threadit" ]; then
+ shift
+ if ! expr "$1" : "[0-9\-]" > /dev/null ; then
+ echo "Specify the number of threads for the iterative step!" 1>&2
+ exit
+ fi
+ numthreadsit=`expr "$1" - 0`
+ elif [ "$1" = "--last_subopt" ]; then
+ last_subopt="-S"
+ elif [ "$1" = "--last_once" ]; then
+ last_once="-U"
+ elif [ "$1" = "--last_m" ]; then
+ shift
+ last_m=`expr "$1" - 0`
+ elif [ "$1" = "--last_e" ]; then
+ shift
+ last_e=`expr "$1" - 0`
elif [ "$1" = "--randomseed" ]; then
shift
randomseed=`expr "$1" - 0`
elif [ "$1" = "--scoreout" ]; then
scoreoutarg="-S -B"
elif [ "$1" = "--outnum" ]; then
- scoreoutarg="-n"
+ outnum="-n"
+ elif [ "$1" = "--leavegappyregion" ]; then
+ legacygapopt="-L"
+ elif [ "$1" = "--legacygappenalty" ]; then
+ legacygapopt="-L"
+ elif [ "$1" = "--merge" ]; then
+ shift
+ mergetable="$1"
+ if [ ! -e "$mergetable" ]; then
+ echo "Cannot open $mergetable" 1>&2
+ echo "" 1>&2
+ exit
+ fi
elif [ "$1" = "--addprofile" ]; then
shift
addarg0="-I"
shift
addarg0="-K -I"
addfile="$1"
+ elif [ "$1" = "--addfragments" ]; then
+ shift
+ addarg0="-K -I"
+ addfile="$1"
+ fragment=1
+ elif [ "$1" = "--addfull" ]; then
+ shift
+ addarg0="-K -I"
+ addfile="$1"
+ fragment=-1
+ elif [ "$1" = "--addlong" ]; then
+ shift
+ addarg0="-K -I"
+ addfile="$1"
+ fragment=-2
+ elif [ "$1" = "--smoothing" ]; then
+ add2ndhalfarg=$add2ndhalfarg" -p "
+ elif [ "$1" = "--keeplength" ]; then
+ add2ndhalfarg=$add2ndhalfarg" -Y "
+ elif [ "$1" = "--mapout" ]; then
+ add2ndhalfarg=$add2ndhalfarg" -Z -Y "
+ elif [ "$1" = "--mapoutfile" ]; then
+ shift
+ add2ndhalfarg=$add2ndhalfarg" -Z -Y "
+ mapoutfile="$1"
elif [ "$1" = "--maxiterate" ]; then
shift
iterate=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify the number of iterations!" 1>&2
+ exit
+ fi
elif [ "$1" = "--retree" ]; then
shift
cycle=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify the number of tree rebuilding!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--text" ]; then
+ sbstmodel=" -b -2 -a "
+ f2clext="-E"
+ seqtype="-P"
+ fft=0
elif [ "$1" = "--aamatrix" ]; then
shift
sbstmodel=" -b -1 "
aamatrix="$1"
+ if [ ! -e "$aamatrix" ]; then
+ echo "Cannot open $aamatrix" 1>&2
+ echo "" 1>&2
+ exit
+ fi
elif [ "$1" = "--treein" ]; then
shift
treeinopt=" -U "
treein=1
treeinfile="$1"
+ if [ ! -e "$treeinfile" ]; then
+ echo "Cannot open $treeinfile" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--pileup" ]; then
+ treeinopt=" -U "
+ treein=1
+ pileuporshuffle="p"
+ elif [ "$1" = "--randomchain" ]; then
+ treeinopt=" -U "
+ treein=1
+ pileuporshuffle="s"
elif [ "$1" = "--topin" ]; then
shift
treeinopt=" -V "
echo "There was a bug in version < 6.530." 1>&2
echo "This bug has not yet been fixed." 1>&2
exit 1
+ elif [ "$1" = "--memsavetree" ]; then
+ treeinopt=" -U "
+ treein=1
+ pileuporshuffle="C"
+ elif [ "$1" = "--memsavetreex" ]; then
+ treeinopt=" -U "
+ treein=1
+ pileuporshuffle="c"
+ elif [ "$1" = "--initialramusage" ]; then
+ shift
+ treeinopt=" -U "
+ treein=1
+ initialramusage="$1"
+ pileuporshuffle="c"
elif [ "$1" = "--kappa" ]; then
shift
kappa=" -k $1 "
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify kappa value!" 1>&2
+ exit
+ fi
elif [ "$1" = "--fmodel" ]; then
fmodel=" -a "
+ elif [ "$1" = "--nwildcard" ]; then
+ nmodel=" -: "
+ elif [ "$1" = "--nzero" ]; then
+ nmodel=" "
elif [ "$1" = "--jtt" ]; then
shift
sbstmodel=" -j $1"
+# if ! expr "$1" : "[0-9]" > /dev/null ; then
+# echo "Specify pam value!" 1>&2
+# exit
+# fi
elif [ "$1" = "--kimura" ]; then
shift
sbstmodel=" -j $1"
+# if ! expr "$1" : "[0-9]" > /dev/null ; then
+# echo "Specify pam value!" 1>&2
+# exit
+# fi
elif [ "$1" = "--tm" ]; then
shift
sbstmodel=" -m $1"
+# if ! expr "$1" : "[0-9]" > /dev/null ; then
+# echo "Specify pam value!" 1>&2
+# exit
+# fi
elif [ "$1" = "--bl" ]; then
shift
sbstmodel=" -b $1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "blosum $1?" 1>&2
+ exit
+ fi
elif [ "$1" = "--weighti" ]; then
shift
weighti="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify weighti value!" 1>&2
+ exit
+ fi
elif [ "$1" = "--weightr" ]; then
shift
weightr="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify weightr value!" 1>&2
+ exit
+ fi
elif [ "$1" = "--weightm" ]; then
shift
weightm="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify weightm value!" 1>&2
+ exit
+ fi
elif [ "$1" = "--rnaalifold" ]; then
rnaalifold=1
elif [ "$1" = "--mccaskill" ]; then
mccaskill=1
contrafold=0
+ dafs=0
elif [ "$1" = "--contrafold" ]; then
mccaskill=0
contrafold=1
+ dafs=0
+ elif [ "$1" = "--dafs" ]; then
+ mccaskill=0
+ contrafold=0
+ dafs=1
elif [ "$1" = "--ribosum" ]; then
rnascoremtx=" -s "
elif [ "$1" = "--op" ]; then
shift
gop="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify op!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--opdist" ]; then
+ shift
+ gopdist="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify opdist!" 1>&2
+ exit
+ fi
+ opdistspecified=1
+ elif [ "$1" = "--allowshift" ]; then
+ allowshift=1
+ elif [ "$1" = "--shiftpenalty" ]; then
+ shift
+ spfactor="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify sf!" 1>&2
+ exit
+ fi
+ shiftpenaltyspecified=1
elif [ "$1" = "--ep" ]; then
shift
- aof="$1"
+# aof="$1"
+ tmpval="$1"
+ aof=`awk "BEGIN{ print -1.0 * \"$tmpval\"}"`
+ if ! expr "$aof" : "[0-9\-]" > /dev/null ; then
+ printf "\nSpecify a number for ep, like --ep 0.1\n" 1>&2
+ printf "'$1' cannot be interpreted as a number..\n\n" 1>&2
+ exit
+ fi
elif [ "$1" = "--rop" ]; then
shift
rgop="$1"
+# Atode check
elif [ "$1" = "--rep" ]; then
shift
rgep="$1"
elif [ "$1" = "--corewin" ]; then
shift
corewin="$1"
+ elif [ "$1" = "--strdir" ]; then
+ shift
+ strdir="$1"
+ elif [ "$1" = "--pdbidlist" ]; then
+ shift
+ pdblist="$1"
+ if [ ! -e "$pdblist" ]; then
+ echo "Cannot open $pdblist" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--pdbfilelist" ]; then
+ shift
+ ownlist="$1"
+ if [ ! -e "$ownlist" ]; then
+ echo "Cannot open $ownlist" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--enrich" ]; then
+ enrich=1
+ enrichseq=1
+ enrichstr=1
+ seektarget=""
+ elif [ "$1" = "--enrichseq" ]; then
+ enrich=1
+ enrichseq=1
+ enrichstr=0
+ seektarget="-seq"
+ elif [ "$1" = "--enrichstr" ]; then
+ enrich=1
+ enrichseq=0
+ enrichstr=1
+ seektarget="-str"
elif [ "$1" = "--seedtable" ]; then
shift
seedtable="y"
shift
seed="m"
seedfiles="$seedfiles $1"
+ elif [ "$1" = "--minimumweight" ]; then
+ shift
+ minimumweight="$1"
+ elif [ "$1" = "--similaritylevel" ]; then
+ shift
+ similarityoffset="$1"
+ elif [ "$1" = "--unalignlevel" ]; then
+ shift
+ unalignlevel="$1"
+ unalignspecified=1
+ elif [ "$1" = "--skipiterate" ]; then
+ shift
+ fixthreshold="$1"
+ elif [ "$1" = "--bunkatsunashi" ]; then
+ bunkatsuopt=" -B "
+ elif [ "$1" = "--sp" ]; then
+ sprigorous=1
+ elif [ "$1" = "--focus" ]; then
+ focusarg=" -= "
+ elif [ "$1" = "--sparsepickup" ]; then
+ shift
+ npickup="$1"
elif [ $progname = "fftns" -o $progname = "nwns" ]; then
if [ "$1" -gt 0 ]; then
cycle=`expr "$1" - 0`
else
echo "Unknown option: $1" 1>&2
er=1;
+# exit 1;
fi
shift
done;
+ echo "" 1>"$progressfile"
# TMPFILE=/tmp/$progname.$$
TMPFILE=`mktemp -dt $progname.XXXXXXXXXX`
echo "mktemp seems to be obsolete. Re-trying without -t" 1>&2
TMPFILE=`mktemp -d /tmp/$progname.XXXXXXXXXX`
fi
+
+# if [ $os = "cygwin" ]; then
+# TMPFILE=`cygpath -w $TMPFILE` unnecessary for cygwin2.7
+# fi
+
umask 077
# mkdir $TMPFILE || er=1
if [ $debug -eq 1 ]; then
- trap "tar cfvz debuginfo.tgz $TMPFILE; rm -rf $TMPFILE " 0
+# trap "tar cfvz debuginfo.tgz $TMPFILE; rm -rf $TMPFILE " 0 # does not work in msys
+ trap "tar cfv - $TMPFILE | gzip -c > debuginfo.tgz; rm -rf $TMPFILE " 0
else
- trap "rm -rf $TMPFILE " 0
+ trap "rm -rf $TMPFILE" 0
fi
if [ $# -eq 1 ]; then
if [ -r "$1" -o "$1" = - ]; then
printf '';
else
echo "$0": Cannot open "$addfile". 1>&2
+ echo "" 1>&2
exit 1;
fi
cat "$addfile" | tr "\r" "\n" | grep -v "^$" >> $TMPFILE/infile
cat "$addfile" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_addfile
cat "$aamatrix" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_aamtx
+ cat "$mergetable" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_subalignmentstable
cat "$treeinfile" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_guidetree
cat "$seedtablefile" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_seedtablefile
cat "$laraparams" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_lara.params
+ cat "$pdblist" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/pdblist
+ cat "$ownlist" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/ownlist
+
# echo $seedfiles
infilename="$1"
seedfilesintmp="/dev/null"
cat "$1" | tr "\r" "\n" > $TMPFILE/seed$#
else
echo "$0": Cannot open "$1". 1>&2
+ echo "" 1>&2
exit 1;
fi
seednseq=$seednseq" "`grep -c '^[>|=]' $TMPFILE/seed$#`
else
echo "$0": Cannot open "$1". 1>&2
+ echo "" 1>&2
er=1
# exit 1;
fi
else
- echo '$#'"=$#" 1>&2
+# echo '$#'"=$#" 1>&2
er=1
fi
- if [ $os != "linux" ]; then
- numthreads=0
+
+ if [ $numthreads -lt 0 ]; then
+ if [ $os = "linux" ]; then
+ nlogicalcore=`cat /proc/cpuinfo | grep "^processor" | uniq | wc -l`
+ ncoresinacpu=`cat /proc/cpuinfo | grep 'cpu cores' | uniq | awk '{print $4}'`
+ nphysicalcpu=`cat /proc/cpuinfo | grep 'physical id' | sort | uniq | wc -l`
+ if [ $nlogicalcore -eq 0 ]; then
+ echo "Cannot get the number of processors from /proc/cpuinfo" 1>>"$progressfile"
+ exit 1
+ fi
+ if [ ${#ncoresinacpu} -gt 0 -a $nphysicalcpu -gt 0 ]; then
+ numthreads=`expr $ncoresinacpu '*' $nphysicalcpu`
+# if [ $nlogicalcore -gt $numthreads ]; then # Hyperthreading
+# numthreads=`expr $numthreads '+' 1`
+# fi
+ else
+ numthreads=$nlogicalcore
+ fi
+ elif [ $os = "darwin" ]; then
+ numthreads=`sysctl -n hw.physicalcpu`
+ if [ -z $numthreads ]; then
+ echo "Cannot get the number of physical cores from sysctl" 1>>"$progressfile"
+ exit 1
+ fi
+# nlogicalcore=`sysctl -n hw.logicalcpu`
+# if [ $nlogicalcore -gt $numthreads ]; then # Hyperthreading
+# numthreads=`expr $numthreads '+' 1`
+# fi
+ elif [ $os = "mingw" -o $os = "cygwin" ]; then
+ numthreads=`wmic cpu get NumberOfCores | head -2 | tail -1 | awk '{print $1}'`
+ else
+ echo "Cannot count the number of physical cores." 1>>"$progressfile"
+ exit 1
+ fi
+ echo "OS = "$os 1>>"$progressfile"
+ echo "The number of physical cores = " $numthreads 1>>"$progressfile"
+ fi
+
+ if [ $numthreadstb -lt 0 ]; then
+ numthreadstb=$numthreads
+ fi
+
+ if [ $numthreadsit -lt 0 ]; then
+ if [ $numthreads -lt 11 ]; then
+ numthreadsit=$numthreads
+ else
+ numthreadsit=10
+ fi
fi
- if [ $numthreads -eq 0 -a $parallelizationstrategy = "BESTFIRST" ]; then
+ if [ $numthreadsit -eq 0 -a $parallelizationstrategy = "BESTFIRST" ]; then
echo 'Impossible' 1>&2;
exit 1;
fi
+ if [ "$addarg0" != " " ]; then
+ iterate=0 # 2013/03/23
+ "$prefix/countlen" < $TMPFILE/_addfile > $TMPFILE/addsize 2>>"$progressfile"
+ nadd=`awk '{print $1}' $TMPFILE/addsize`
+ if [ $nadd -eq "0" ]; then
+ echo Check $addfile 1>&2
+ exit 1;
+ fi
+ if [ $seed != "x" -o $seedtable != "x" ]; then
+ echo 'Impossible' 1>&2;
+ echo 'Use either ONE of --seed, --seedtable, --addprofile and --add.' 1>&2
+ exit 1;
+ fi
+ else
+ nadd="0"
+ fi
+
if [ $auto -eq 1 ]; then
- "$prefix/countlen" < $TMPFILE/infile > $TMPFILE/size
+ "$prefix/countlen" < $TMPFILE/infile > $TMPFILE/size 2>>"$progressfile"
nseq=`awk '{print $1}' $TMPFILE/size`
nlen=`awk '{print $3}' $TMPFILE/size`
- if [ $nlen -lt 2000 -a $nseq -lt 100 ]; then
+
+ if [ $nlen -lt 3000 -a $nseq -lt 100 ]; then
distance="local"
iterate=1000
+ cycle=1
+ elif [ $nlen -lt 1000 -a $nseq -lt 200 ]; then
+ distance="local"
+ iterate=2
+ cycle=1
elif [ $nlen -lt 10000 -a $nseq -lt 500 ]; then
- distance="sixtuples"
+ distance="ktuples"
iterate=2
- else
- distance="sixtuples"
+ cycle=2
+ elif [ $nseq -lt 50000 ]; then # changed from 10000 2014/Oct/4
+ distance="ktuples"
+ iterate=0
+ cycle=2
+ elif [ $nseq -lt 90000 ]; then # changed from 30000 2014/Oct/4
+ distance="ktuples"
iterate=0
+ cycle=1
+ elif [ $nlen -lt 3000 ]; then
+ distance="parttree"
+ partdist="localalign"
+ algopt=" "
+ algoptit=" "
+# algspecified=1
+ cycle=1
+ else
+ distance="parttree"
+ partdist="ktuples"
+ algopt=" "
+ algoptit=" "
+# algspecified=1
+ cycle=1
fi
- if [ $quiet -eq 0 ]; then
- echo "nseq = " $nseq 1>&2
- echo "nlen = " $nlen 1>&2
- echo "distance = " $distance 1>&2
- echo "iterate = " $iterate 1>&2
+
+
+# if [ $nlen -lt 3000 -a $nseq -lt 100 ]; then
+# distance="local"
+# iterate=1000
+# cycle=1
+# elif [ $nlen -lt 1000 -a $nseq -lt 200 ]; then
+# distance="local"
+# iterate=2
+# cycle=1
+# elif [ $nlen -lt 10000 -a $nseq -lt 500 ]; then
+# distance="ktuples"
+# iterate=2
+# cycle=2
+# elif [ $nseq -lt 200000 ]; then
+# distance="ktuples"
+# iterate=0
+# treeinopt=" -U "
+# treein=1
+# pileuporshuffle="a"
+# elif [ $nlen -lt 3000 ]; then
+# distance="parttree"
+# partdist="localalign"
+# algopt=" "
+# algoptit=" "
+## algspecified=1
+# cycle=1
+# else
+# distance="parttree"
+# partdist="ktuples"
+# algopt=" "
+# algoptit=" "
+## algspecified=1
+# cycle=1
+# fi
+
+
+ if [ $fragment -ne 0 ]; then
+ norg=`expr $nseq '-' $nadd`
+ npair=`expr $norg '*' $nadd`
+ echo "nadd = " $nadd 1>>"$progressfile"
+ echo "npair = " $npair 1>>"$progressfile"
+ echo "nseq = " $nseq 1>>"$progressfile"
+ echo "nlen = " $nlen 1>>"$progressfile"
+# nagasa check!
+#
+ if [ $npair -gt 10000000 -o $nlen -gt 500000 ]; then # 2015/Jun
+ distance="ktuples"
+ echo "use ktuples, size=$tuplesize!" 1>>"$progressfile"
+ elif [ $npair -gt 3000000 -o $nlen -gt 100000 ]; then # 2015/Jun
+ distance="multi"
+ weighti="0.0"
+ echo "use multipair, weighti=0.0!" 1>>"$progressfile"
+ else
+ distance="multi"
+ echo "use multipair, weighti=$weighti!" 1>>"$progressfile"
+ fi
+ pairspecified=1
fi
fi
+ if [ `awk "BEGIN {print( 0.0+\"$sueff\" < 0.0 || 0.0+\"$sueff\" > 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "The argument of --mixedlinkage must be between 0.0 and 1.0" 1>>"$progressfile"
+ exit 1;
+ fi
+
+ if [ $allowshift -eq 1 ]; then
+ if [ $unalignspecified -ne 1 ]; then
+ unalignlevel="0.8"
+ fi
+ if [ $shiftpenaltyspecified -ne 1 ]; then
+ spfactor="2.00"
+ fi
+ fi
+
+ if [ $opdistspecified -ne 1 ]; then
+ gopdist=$gop
+ fi
+
+ if [ $unalignlevel != "0.0" -o `awk "BEGIN {print( 0.0+\"$spfactor\" < 100.0 )}"` -gt 0 ]; then
+ nmodel=" -: "
+ termgapopt=" "
+ if [ $distance = "localgenaf" ]; then
+ printf "\n%s\n" "The combination of --allowshift and --genafpair (E-INS-i/-1) is not supported." 1>>"$progressfile"
+ printf "%s\n" "Instead, please try --allowshift --globalpair (G-INS-i/-1 in the web version)," 1>>"$progressfile"
+ printf "%s\n\n" "which covers the situation for --genafpair (E-INS-i/-1), too." 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ $distance != "global" -o `awk "BEGIN {print( 0.0+\"$weighti\" < 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "At present, --unalignlevel # or --allowshift is supported only with the --globalpair option." 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ $fragment -ne 0 ]; then
+ printf "\n%s\n\n" "At present, --unalignlevel # or --allowshift is not supported with the --addfragments option." 1>>"$progressfile"
+ exit 1;
+ fi
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$spfactor\" < 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n" "shiftpenalty must be >1." 1>>"$progressfile"
+ exit 1;
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$fixthreshold\" < 0.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "The 'fix' parameter must be >= 0.0" 1>>"$progressfile"
+ exit 1;
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$unalignlevel\" < 0.0 || 0.0+\"$unalignlevel\" > 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "The 'unalignlevel' parameter must be between 0.0 and 1.0" 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ `awk "BEGIN {print( 0.0+\"$unalignlevel\" > 0.0 )}"` -gt 0 ]; then
+ laof="0"
+ lexp="0"
+ pgaof="0"
+ pgexp="0"
+ LEXP="0"
+ GEXP="0"
+ termgapopt=" "
+# if [ $auto -eq 1 -o $fragment -ne 0 -o $iterate -gt 0 ]; then
+ if [ $fragment -ne 0 ]; then
+ printf "\n%s\n\n" "At present, the 'unalignlevel > 0' mode is not supported with the --addfragments option." 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ $distance = "parttree" ]; then
+ printf "\n%s\n\n" "At present, the 'unalignlevel > 0' mode is not supported in the (dp)parttree option." 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ $distance = "localgenaf" ]; then
+ printf "\n%s\n" "The --genafpair is not supported in the 'unalignlevel > 0' mode." 1>>"$progressfile"
+ printf "%s\n" "Instead, please try --unalignlevel xx --globalpair," 1>>"$progressfile"
+ printf "%s\n\n" "which covers the situation for --genafpair (E-INS-i), too." 1>>"$progressfile"
+ exit 1;
+ fi
+# if [ $distance != "ktuples" -a `awk "BEGIN {print( 0.0+\"$weighti\" > 0.0 )}"` -gt 0 -a $iterate -gt 0 ]; then
+# printf "\n%s\n\n" "Please add --weighti 0.0, for now." 1>>"$progressfile"
+# exit 1;
+# fi
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$similarityoffset\" != 0.0 && 0.0+\"$unalignlevel\" != 0.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "Do not simultaneously specify --similaritylevel and --unalignlevel" 1>>"$progressfile"
+ exit 1;
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$similarityoffset\" < -1.0 || 0.0+\"$similarityoffset\" > 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "Similarity must be between -1.0 and +1.0" 1>>"$progressfile"
+ exit 1;
+ fi
+ aof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $aof}"`
+ laof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $laof}"`
+ pgaof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $pgaof}"`
+
+
if [ $parallelizationstrategy = "BESTFIRST" -o $parallelizationstrategy = "BAATARI0" ]; then
iteratelimit=254
else
rnaopt=" -e $rgep -o $rgop -c $weightm -r $weightr -R $rnascoremtx "
# rnaoptit=" -o $rgop -BT -c $weightm -r $weightr -R "
rnaoptit=" -o $rgop -F -c $weightm -r $weightr -R "
- elif [ $mccaskill -eq 1 -o $contrafold -eq 1 ]; then
+ elif [ $mccaskill -eq 1 -o $dafs -eq 1 -o $contrafold -eq 1 ]; then
rnaopt=" -o $rgop -c $weightm -r $weightr "
# rnaoptit=" -e $rgep -o $rgop -BT -c $weightm -r $weightr $rnascoremtx "
rnaoptit=" -e $rgep -o $rgop -F -c $weightm -r $weightr $rnascoremtx "
rnaoptit=" -F "
fi
- model="$sbstmodel $kappa $fmodel"
+# if [ $algspecified -eq 0 ]; then
+# if [ $distance = "parttree" ]; then
+# algopt=" -Q "
+# algoptit=" "
+# else
+# algopt=" "
+# algoptit=" "
+# fi
+# fi
+
+ if [ $sprigorous -eq 1 ]; then
+ algopt=" -@ "
+ if [ $iterate -gt 0 ]; then
+ if [ $numthreadsit -eq 0 ]; then
+ algoptit=" -@ -B -Z -z 1000 "
+ else
+ echo "" 1>>"$progressfile"
+ echo "At present, the combination of --sp and iterative refinement is supported only in a single thread." 1>>"$progressfile"
+ echo "Please try \"--thread -1 --threadit 0\", which runs the iterative refinment calculation on a single thread." 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ exit 1;
+# algoptit=" -@ -B -z 1000 "
+ fi
+ fi
+ termgapopt=" "
+ fft=0
+ memopt=" -N "
+ fi
+
+ model="$sbstmodel $kappa $fmodel $nmodel"
if [ $er -eq 1 ]; then
echo "------------------------------------------------------------------------------" 1>&2
# echo "" 1>&2
# echo " Usage: `basename $0` [options] inputfile > outputfile" 1>&2
echo " http://mafft.cbrc.jp/alignment/software/" 1>&2
- echo " NAR 30:3059-3066 (2002), Briefings in Bioinformatics 9:286-298 (2008)" 1>&2
+ echo " MBE 30:772-780 (2013), NAR 30:3059-3066 (2002)" 1>&2
# echo "------------------------------------------------------------------------------" 1>&2
# echo " % mafft in > out" 1>&2
echo "------------------------------------------------------------------------------" 1>&2
echo "--clustalout : Output: clustal format, default: fasta" 1>&2
echo "--reorder : Outorder: aligned, default: input order" 1>&2
echo "--quiet : Do not report progress" 1>&2
- echo "--thread # : Number of threads. (# must be <= number of physical cores - 1)" 1>&2
+ echo "--thread # : Number of threads (if unsure, --thread -1)" 1>&2
# echo "" 1>&2
# echo " % mafft --maxiterate 1000 --localpair in > out (L-INS-i)" 1>&2
# echo " most accurate in many cases, assumes only one alignable domain" 1>&2
exit 1
fi
fi
+ if [ $distance = "last" -o $distance = "lastmulti" ]; then
+ if [ ! -x "$prefix/lastal" -o ! -x "$prefix/lastdb" ]; then
+ echo "" 1>&2
+ echo "== Install LAST ============================================================" 1>&2
+ echo "LAST (Kielbasa, Wan, Sato, Horton, Frith 2011 Genome Res. 21:487) is required." 1>&2
+ echo "http://last.cbrc.jp/" 1>&2
+ echo "http://mafft.cbrc.jp/alignment/software/xxxxxxx.html " 1>&2
+ echo "============================================================================" 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+ fi
if [ $distance = "lara" -o $distance = "slara" ]; then
if [ ! -x "$prefix/mafft_lara" ]; then
echo "" 1>&2
exit 1
fi
fi
- if [ $distance = "scarna" ]; then
+ if [ $distance = "scarna" -o $mccaskill -eq 1 ]; then
if [ ! -x "$prefix/mxscarnamod" ]; then
echo "" 1>&2
echo "== Install MXSCARNA ======================================================" 1>&2
echo "MXSCARNA (Tabei et al. BMC Bioinformatics 2008 9:33) is required." 1>&2
echo "Please 'make' at the 'extensions' directory of the MAFFT source package," 1>&2
echo "which contains the modified version of MXSCARNA." 1>&2
- echo "http://align.bmr.kyushu-u.ac.jp/mafft/software/source.html " 1>&2
+ echo "http://mafft.cbrc.jp/alignment/software/source.html " 1>&2
echo "==========================================================================" 1>&2
echo "" 1>&2
exit 1
fi
fi
- if [ $mccaskill -eq 1 ]; then
- if [ ! -x "$prefix/mxscarnamod" ]; then
+ if [ $distance = "dafs" -o $dafs -eq 1 ]; then
+ if [ ! -x "$prefix/dafs" ]; then
echo "" 1>&2
- echo "== Install MXSCARNA ======================================================" 1>&2
- echo "MXSCARNA (Tabei et al. BMC Bioinformatics 2008 9:33) is required." 1>&2
- echo "Please 'make' at the 'extensions' directory of the MAFFT source package," 1>&2
- echo "which contains the modified version of MXSCARNA." 1>&2
- echo "http://align.bmr.kyushu-u.ac.jp/mafft/software/source.html " 1>&2
+ echo "== Install DAFS===========================================================" 1>&2
+ echo "DAFS (Sato et al. Journal 2012 issue:page) is required." 1>&2
+ echo "http://www.ncrna.org/ " 1>&2
echo "==========================================================================" 1>&2
echo "" 1>&2
exit 1
if [ $cycle -eq 0 ]; then
treeoutopt="-t -T"
iterate=0
- if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then
- distance="distonly"
- fi
+ weighti="0.0" # 2016Jul31, tbfast.c kara idou
+# if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then # 2012/04, localpair --> local alignment distance
+# if [ $distance = "global" ]; then
+# distance="distonly"
+# fi
if [ $treeout -eq 1 ]; then
parttreeoutopt="-t"
groupsize=1
fi
if [ $distout -eq 1 ]; then
distoutopt="-y -T"
+ if [ $treeout -eq 0 ]; then
+ treeoutopt=""
+ fi
fi
else
if [ $treeout -eq 1 ]; then
cycle=3
fi
- if [ $nseq -gt 4000 -a $iterate -gt 1 ]; then
+ if [ $nseq -gt 60000 -a $iterate -gt 1 ]; then # 2014/Oct/22, test
echo "Too many sequences to perform iterative refinement!" 1>&2
echo "Please use a progressive method." 1>&2
exit 1
fi
+ if [ $distance = "lastmulti" -o $distance = "multi" ]; then
+ if [ $fragment -eq 0 ]; then
+ echo 'Specify --addfragments too' 1>&2
+ exit 1
+ fi
+ fi
+
+ if [ $fragment -ne 0 ]; then
+ if [ $pairspecified -eq 0 ]; then
+ distance="multi"
+ fi
+ if [ $distance != "multi" -a $distance != "hybrid" -a $distance != "lastmulti" -a $distance != "local" -a $distance != "last" -a $distance != "ktuples" -a $distance != "ktuplesmulti" ]; then
+ echo 'Specify --multipair, --lastmultipair, --lastpair, --localpair, --6merpair, --6mermultipair or --hybridpair' 1>&2
+ exit 1
+ fi
+ fi
+
+ if [ "$memopt" = " -M -B " -a "$distance" != "ktuples" ]; then
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+
+ if [ $distance = "parttree" ]; then
+ if [ $mergetable != "/dev/null" ]; then
+ echo "The combination of (dp)parttree and merge is Impossible. " 1>&2
+ exit 1
+ fi
+ if [ $addfile != "/dev/null" ]; then
+ echo "The combination of (dp)parttree and add(fragments) is Impossible. " 1>&2
+ exit 1
+ fi
+ if [ $seed != "x" -o $seedtable != "x" ]; then
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ if [ $iterate -gt 1 ]; then
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ if [ $outorder = "aligned" ]; then
+ outorder="input"
+ fi
+ outorder="input" # partorder ga kiku
+ if [ $partdist = "localalign" ]; then
+ splitopt=" -U " # -U -l -> fast
+ cycle=1
+ elif [ $partdist = "fasta" ]; then
+ splitopt=" -S "
+ cycle=1
+ else
+ splitopt=" "
+ fi
+ fi
- if [ $distance = "sixtuples" -a \( $seed = "x" -a $seedtable = "x" \) ]; then
- localparam=" "
- elif [ $distance = "sixtuples" -a \( $seed != "x" -o $seedtable != "x" \) ]; then
+ if [ \( $distance = "ktuples" -o $distance = "ktuplesmulti" \) -a \( $seed = "x" -a $seedtable = "x" -a $ownlist = "/dev/null" -a $pdblist = "/dev/null" -a $enrichstr -eq 0 \) ]; then
+ localparam=""
+ weighti="0.0"
+ elif [ \( $distance = "ktuples" -o $distance = "ktuplesmulti" \) -a \( $seed != "x" -o $seedtable != "x" -o $ownlist != "/dev/null" -o $pdblist != "/dev/null" -o $enrichstr -eq 1 \) ]; then
if [ $cycle -lt 2 ]; then
- cycle=2 # nazeda
+ cycle=2 # disttbfast ha seed hi-taiou # chuui 2014Aug21
fi
if [ $iterate -lt 2 ]; then
echo "############################################################################" 1>&2
fi
localparam="-l "$weighti
elif [ $distance = "parttree" ]; then
- localparam=" "
+ localparam=""
+ weighti="0.0"
if [ $groupsize -gt -1 ]; then
cycle=1
fi
else
- localparam=" -l "$weighti
+ localparam="-B -l "$weighti # weighti=0 demo bunkatsu nashi
if [ $cycle -gt 1 ]; then # 09/01/08
cycle=1
fi
fi
+
if [ $distance = "localgenaf" -o $distance = "globalgenaf" ]; then
aof="0.000"
- fi
-
- if [ "$memopt" = " -M -B " -a "$distance" != "sixtuples" ]; then
- echo "Impossible" 1>&2
- exit 1
- fi
-#exit
-
- if [ $distance = "parttree" ]; then
- if [ $seed != "x" -o $seedtable != "x" ]; then
- echo "Impossible" 1>&2
- exit 1
- fi
- if [ $iterate -gt 1 ]; then
- echo "Impossible" 1>&2
- exit 1
- fi
- if [ $outorder = "aligned" ]; then
- outorder="input"
- fi
- outorder="input" # partorder ga kiku
- if [ $partdist = "localalign" ]; then
- splitopt=" -L " # -L -l -> fast
- elif [ $partdist = "fasta" ]; then
- splitopt=" -S "
- else
- splitopt=" "
+ if [ $oldgenafparam -ne 1 ]; then
+ laof="0.0"
+ lexp="0.0"
+# LEXP="0.0" # default = 0.0
+ usenaivepairscore="-Z"
fi
fi
echo 'Use either one of seedtable and seed. Not both.' 1>&2
exit 1
fi
-# if [ $seedtable != "x" -a $anysymbol -gt 0 ]; then
-# echo 'The combination of --seedtable and --anysymbol is not supported.' 1>&2
-# exit 1
-# fi
+ if [ $f2clext = "-E" -a $anysymbol -gt 0 ]; then
+ echo '' 1>&2
+ echo 'At present, the combination of --text and ( --anysymbol or --preservecase ) is impossible.' 1>&2
+ echo '' 1>&2
+ exit 1
+ fi
+
+ if [ $f2clext = "-E" -a $aamatrix != "/dev/null" ]; then
+ echo '' 1>&2
+ echo 'At present, the combination of --text and (--aamatrix) is impossible.' 1>&2
+ echo '' 1>&2
+ exit 1
+ fi
if [ $treein -eq 1 ]; then
# if [ $iterate -gt 0 ]; then
# echo 'Not supported yet.' 1>&2
# exit 1
# fi
- cycle=1
+ if [ ! -s $TMPFILE/_guidetree ]; then
+ if [ $distance != "ktuples" ]; then
+ echo "Not supported yet" 1>>"$progressfile"
+ exit 1
+ fi
+ if [ $pileuporshuffle = "p" ]; then
+ echo "pileup" > $TMPFILE/_guidetree
+# weightopt=" -u " -> disttbfast.c?
+# numthreadstb=0 -> disttbfast.c
+ cycle=1 # disttbfast. shitei
+ elif [ $pileuporshuffle = "s" ]; then
+ echo "shuffle $randomseed" > $TMPFILE/_guidetree
+# numthreadstb=0 -> disttbfast.c
+# weightopt=" -u " -> disttbfast.c?
+ cycle=1 # disttbfast.c dem shitei
+ elif [ $pileuporshuffle = "C" ]; then
+ echo "very compact" > $TMPFILE/_guidetree
+ elif [ $pileuporshuffle = "c" ]; then
+ echo "compact " "$initialramusage" > $TMPFILE/_guidetree
+ elif [ $pileuporshuffle = "a" ]; then
+ echo "auto $randomseed 200" > $TMPFILE/_guidetree
+ fi
+ fi
fi
- if [ "$addarg0" != " " ]; then
- "$prefix/countlen" < $TMPFILE/_addfile > $TMPFILE/addsize
- nadd=`awk '{print $1}' $TMPFILE/addsize`
- if [ $nadd -eq "0" ]; then
- echo Check $addfile 1>&2
- exit 1;
+ if [ $nadd -gt "0" ]; then
+ if [ $fragment -eq "1" ]; then
+ addarg="$addarg0 $nadd -g -0.01"
+ addsinglearg=""
+ cycle=1 # chuui 2014Aug25
+ elif [ $fragment -eq "-1" ]; then
+ addarg="$addarg0 $nadd"
+ addsinglearg="-V" # allowlongadds, 2014/04/02
+ cycle=1 # chuui 2014Aug25
+ elif [ $fragment -eq "-2" ]; then
+ addarg="$addarg0 $nadd"
+ addsinglearg="-V" # allowlongadds + smoothing
+ add2ndhalfarg=$add2ndhalfarg" -p "
+ cycle=1 # chuui 2014Aug25
+ usenaivepairscore="-Z" # 2015Jun01
+ laof=0.0 # 2015Jun01
+ lexp=0.0 # 2015Jun01
+ else
+ addarg="$addarg0 $nadd"
+ addsinglearg=""
fi
- addarg="$addarg0 $nadd"
- cycle=1
+
+# cycle=1 # chuui 2014Aug19
iterate=0
- if [ $seed != "x" -o $seedtable != "x" ]; then
- echo 'Impossible' 1>&2;
- echo 'Use either ONE of --seed, --seedtable, --addprofile and --add.' 1>&2
- exit 1;
+# treealg=" -q " ## 2012/01/24 ## removed 2012/02/06
+ fi
+
+
+ if [ -z "$localparam" -a $fragment -eq 0 -a $distance != "parttree" ]; then
+# echo "use disttbfast"
+# echo cycle = $cycle
+ cycletbfast=1 # tbfast wo jikkou shinai
+ cycledisttbfast=$cycle # disttbfast ni -E cycle wo watasu
+ if [ $cycledisttbfast -eq 0 ]; then # --treeout de tsukau
+ cycledisttbfast=1
fi
+ else
+# echo "use tbfast"
+# echo cycle = $cycle
+ cycletbfast=$cycle # 1 ijou nara jikkou
+ cycledisttbfast=1 # disttbfast ha ikkai dake
fi
- if [ $mccaskill -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then
- if [ $distance = "sixtuples" ]; then
+# echo localparam=
+# echo $localparam
+# echo cycletbfast=
+# echo $cycletbfast
+# echo cycledisttbfast=
+# echo $cycledisttbfast
+
+#exit
+
+ if [ $adjustdirection -gt 0 -a $seed != "x" ]; then
+ echo '' 1>&2
+ echo 'The combination of --adjustdirection(accurately) and --seed is not supported.' 1>&2
+ echo '' 1>&2
+ exit 1
+ fi
+
+
+ if [ $mccaskill -eq 1 -o $dafs -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then
+ if [ $distance = "ktuples" ]; then
echo 'Not supported.' 1>&2
- echo 'Please add --globalpair, --localpair, --scarnapair,' 1>&2
+ echo 'Please add --globalpair, --localpair, --scarnapair, --dafspair' 1>&2
echo '--larapair, --slarapair, --foldalignlocalpair or --foldalignglobalpair' 1>&2
exit 1
fi
+ if [ $f2clext = "-E" ]; then
+ echo '' 1>&2
+ echo 'For RNA alignment, the --text mode is impossible.' 1>&2
+ echo '' 1>&2
+ exit 1
+ fi
fi
- if [ $mccaskill -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then
- if [ $distance = "scarna" -o $distance = "lara" -o $distance = "slara" -o $distance = "foldalignlocal" -o $distance = "foldalignglobal" ]; then
- strategy="X-I"
+# cycle ga atode henkou sareru node koko de strategy no namae wo kimeru.
+# kokokara
+ if [ $pileuporshuffle = "p" ]; then
+ strategy="Pileup-"
+ elif [ $pileuporshuffle = "s" ]; then
+ strategy="Randomchain-"
+ elif [ $mccaskill -eq 1 -o $dafs -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then
+ if [ $distance = "scarna" -o $distance = "dafs" -o $distance = "lara" -o $distance = "slara" -o $distance = "foldalignlocal" -o $distance = "foldalignglobal" ]; then
+ strategy="X-"
elif [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o "globalgenaf" ]; then
- strategy="Q-I"
+ strategy="Q-"
fi
elif [ $distance = "fasta" -a $sw -eq 0 ]; then
- strategy="F-I"
+ strategy="F-"
elif [ $distance = "fasta" -a $sw -eq 1 ]; then
- strategy="H-I"
+ strategy="H-"
elif [ $distance = "blast" ]; then
- strategy="B-I"
+ strategy="B-"
elif [ $distance = "global" -o $distance = "distonly" ]; then
- strategy="G-I"
+ strategy="G-"
elif [ $distance = "local" ]; then
- strategy="L-I"
+ strategy="L-"
+ elif [ $distance = "last" ]; then
+ strategy="Last-"
+ elif [ $distance = "hybrid" ]; then
+ strategy="Hybrid-"
+ elif [ $distance = "multi" ]; then
+ strategy="Multi-"
+ elif [ $distance = "lastmulti" ]; then
+ strategy="LastMulti-"
elif [ $distance = "localgenaf" ]; then
- strategy="E-I"
+ strategy="E-"
elif [ $distance = "globalgenaf" ]; then
- strategy="K-I"
+ strategy="K-"
elif [ $fft -eq 1 ]; then
strategy="FFT-"
else
strategy="NW-"
fi
+# if [ `echo "$weighti>0.0" | bc` -gt 0 ]; then
+ if [ `awk "BEGIN {print(0.0+\"$weighti\">0.0)}"` -gt 0 ]; then
+ strategy=$strategy"I"
+ fi
strategy=$strategy"NS-"
if [ $iterate -gt 0 ]; then
strategy=$strategy"i"
else
strategy=$strategy"PartTree-"$cycle
fi
+ elif [ $fragment -eq 1 ]; then
+ strategy=$strategy"fragment"
+ elif [ $fragment -eq -1 ]; then
+ strategy=$strategy"full"
+ elif [ $fragment -eq -2 ]; then
+ strategy=$strategy"long"
else
strategy=$strategy$cycle
fi
fi
if [ $outputformat = "clustal" -a $outorder = "aligned" ]; then
- outputopt=" -c $strategy -r $TMPFILE/order "
+ outputopt=" -c $strategy -r $TMPFILE/order $f2clext "
elif [ $outputformat = "clustal" -a $outorder = "input" ]; then
- outputopt=" -c $strategy "
+ outputopt=" -c $strategy $f2clext "
elif [ $outputformat = "phylip" -a $outorder = "aligned" ]; then
outputopt=" -y -r $TMPFILE/order "
elif [ $outputformat = "phylip" -a $outorder = "input" ]; then
else
outputopt="null"
fi
+# kokomade
+
+
- (
- cd $TMPFILE;
+# ( # 2017/Mar/17
+ pushd $TMPFILE > /dev/null;
- if [ $quiet -gt 0 ]; then
+ cat /dev/null > pre
- if [ $anysymbol -eq 1 ]; then
- mv infile orig
- "$prefix/replaceu" -i orig > infile 2>/dev/null || exit 1
+# echo "nseq = " $nseq 1>>"$progressfile"
+# echo "distance = " $distance 1>>"$progressfile"
+# echo "iterate = " $iterate 1>>"$progressfile"
+# echo "cycle = " $cycle 1>>"$progressfile"
+
+ if [ $anysymbol -eq 1 ]; then
+ mv infile orig
+ "$prefix/replaceu" $seqtype -i orig > infile 2>>"$progressfile" || exit 1
+ fi
+
+ if [ $mergetable != "/dev/null" ]; then
+ if [ $nadd -gt "0" ]; then
+ echo "Impossible" 1>&2
+ exit 1
fi
+# if [ $seed != "x" -o $seedtable != "x" ]; then
+# echo "This version does not support the combination of merge and seed." 1>&2
+# exit 1
+# fi
+# iterate=0 # 2013/04/16
+ mergearg="-H $seedoffset"
+ fi
- if [ $seed != "x" ]; then
- mv infile infile2
- if [ $anysymbol -eq 1 ]; then
- mv orig orig2
- cat /dev/null > orig
- fi
- cat /dev/null > infile
- cat /dev/null > hat3.seed
- seedoffset=0
-# echo "seednseq="$seednseq
-# echo "seedoffset="$seedoffset
- set $seednseq > /dev/null
-# echo $#
- while [ $# -gt 1 ]
- do
- shift
-# echo "num="$#
-
- if [ $anysymbol -eq 1 ]; then
- cat seed$# >> orig
- "$prefix/replaceu" -i seed$# -o $seedoffset > clean 2>/dev/null || exit 1
- mv clean seed$#
- fi
- "$prefix/multi2hat3s" -t $nseq -o $seedoffset -i seed$# >> infile 2>/dev/null || exit 1
- cat hat3 >> hat3.seed
-# echo "$1"
- seedoffset=`expr $seedoffset + $1`
-# echo "$1"
-# echo "seedoffset="$seedoffset
- done;
-# echo "seedoffset="$seedoffset
- if [ $anysymbol -eq 1 ]; then
- "$prefix/replaceu" -i orig2 -o $seedoffset >> infile 2>/dev/null || exit 1 # yarinaoshi
- cat orig2 >> orig
- else
- cat infile2 >> infile
- fi
- elif [ $seedtable != "x" ]; then
- cat _seedtablefile > hat3.seed
+ if [ $adjustdirection -gt 0 ]; then
+ if [ $fragment -ne 0 ]; then
+ fragarg="-F" #
else
- cat /dev/null > hat3.seed
+ fragarg="-F" # 2014/02/06, do not consider other additional sequences, even in the case of --add
fi
-# cat hat3.seed
+ if [ $adjustdirection -eq 1 ]; then
+ "$prefix/makedirectionlist" $fragarg -C $numthreads -m -I $nadd -i infile -t 0.00 -r 5000 -o a > _direction 2>>"$progressfile"
+ elif [ $adjustdirection -eq 2 ]; then
+ "$prefix/makedirectionlist" $fragarg -C $numthreads -m -I $nadd -i infile -t 0.00 -r 100 -o a -d > _direction 2>>"$progressfile"
+ fi
+ "$prefix/setdirection" $mergearg -d _direction -i infile > infiled 2>>"$progressfile" || exit
+ mv infiled infile
+ if [ $anysymbol -eq 1 ]; then
+ "$prefix/setdirection" $mergearg -d _direction -i orig -r > origd 2>>"$progressfile" || exit
+ mv origd orig
+ fi
+ fi
+ if [ $seed != "x" -o $seedtable != "x" ]; then
+ if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then
+ echo "The combination of --seed and (--pdbidlist or --pdbfilelist) is impossible." 1>>"$progressfile"
+ exit 1
+ fi
+ if [ $enrich -eq 1 ]; then
+ echo "The combination of --seed and (--enrich, --enrichseq or --enrichstr) is impossible at present." 1>>"$progressfile"
+ exit 1
+ fi
+ fi
- if [ $mccaskill -eq 1 ]; then
- "$prefix/mccaskillwrap" -C $numthreads -d "$prefix" -i infile > hat4 2>/dev/null || exit 1
- elif [ $contrafold -eq 1 ]; then
- "$prefix/contrafoldwrap" -d "$prefix" -i infile > hat4 2>/dev/null || exit 1
+ if [ $enrich -eq 1 ]; then
+ if [ $ownlist != "/dev/null" ]; then
+ echo "Warning: Sequence homologs of the structures given with the --pdbfilelist option cannot be collected.\n" 1>>"$progressfile"
fi
- if [ $distance = "fasta" ]; then
- "$prefix/dndfast7" $swopt < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "blast" ]; then
- "$prefix/dndblast" < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "foldalignlocal" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $lexp -f $lgop -h $laof -H -d "$prefix" < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "foldalignglobal" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $pgexp -f $pggop -h $pgaof -H -o -global -d "$prefix" < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "slara" ]; then
- "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -T -d "$prefix" < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "lara" ]; then
- "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -B -d "$prefix" < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "scarna" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -s -d "$prefix" < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "global" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -F < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "local" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -L < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "globalgenaf" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -O $GGOP -E $GEXP -K < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "localgenaf" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -O $LGOP -E $LEXP -N < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "distonly" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -t < infile > /dev/null 2>&1 || exit 1
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "parttree" ]; then
- "$prefix/splittbfast" -Q $splitopt $partorderopt $parttreeoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft -p $partsize -s $groupsize $treealg -i infile > pre 2>/dev/null || exit 1
- mv hat3.seed hat3
+ echo "SEEKQUENCER (http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/) is" 1>>"$progressfile"
+ if [ $pdblist != "/dev/null" ]; then
+ echo "collecting homoplogs of the input sequences and the structures given with the --pdbidlist option." 1>>"$progressfile"
+ perl "$prefix/seekquencer_premafft.pl" $seektarget -run thread -trd 2 -seqd uniref90 -blim 1000 -noin -seqf infile -idf pdblist -out seekout -mod mafftash-split 2>>"seekerr"
+ seekres="$?"
else
- "$prefix/disttbfast" -O $outnum $addarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $algopt $treealg $scoreoutarg < infile > pre 2>/dev/null || exit 1
- mv hat3.seed hat3
+ echo "collecting homologs of the input sequences." 1>>"$progressfile"
+ perl "$prefix/seekquencer_premafft.pl" $seektarget -run thread -trd 2 -seqd uniref90 -blim 1000 -noin -seqf infile -out seekout -mod mafftash-split 2>>"seekerr"
+ seekres="$?"
fi
- while [ $cycle -gt 1 ]
- do
- if [ $distance = "parttree" ]; then
- mv pre infile
- "$prefix/splittbfast" -Z -Q $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft -p $partsize -s $groupsize $treealg -i infile > pre 2>/dev/null || exit 1
+ cat seekerr 1>>"$progressfile"
+
+ if [ $seekres -ne "0" ]; then
+ echo "Error in SEEKQUENCER" 1>>"$progressfile"
+ exit 1;
+ fi
+ echo "Done." 1>>"$progressfile"
+
+ if [ $enrichseq -eq 1 ]; then
+# cat seekout.seq >> infile
+ if [ $anysymbol -eq 1 ]; then
+ "$prefix/replaceu" $seqtype -i seekout.seq -o $nseq >> infile
+ cat seekout.seq >> orig
else
- "$prefix/tbfast" -O $outnum -C $numthreads $rnaopt $weightopt $treeoutopt $distoutopt $memopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt -J $treealg $scoreoutarg < pre > /dev/null 2>&1 || exit 1
+ "$prefix/replaceu" $seqtype -i seekout.seq | sed 's/_os_[0-9]*_oe_//' >> infile
fi
- cycle=`expr $cycle - 1`
- done
- if [ $iterate -gt 0 ]; then
- if [ $distance = "sixtuples" ]; then
- "$prefix/dndpre" -C $numthreads < pre > /dev/null 2>&1 || exit 1
+
+ fi
+ if [ $enrichstr -eq 1 ]; then
+ nseekstr=`wc -l < seekout.str`
+ if [ $nseekstr -gt 1 ]; then
+ cat seekout.str >> pdblist
+ pdblist="tsukaimasu"
fi
- "$prefix/dvtditr" -C $numthreads -t $randomseed $rnaoptit $memopt $scorecalcopt $localparam -z 50 $seqtype $model -f "-"$gop -h "-"$aof -I $iterate $weightopt $treeinopt $algoptit $treealg -p $parallelizationstrategy $scoreoutarg < pre > /dev/null 2>&1 || exit 1
fi
- else
+ fi
+ if [ $seed != "x" ]; then
+ mv infile infile2
if [ $anysymbol -eq 1 ]; then
- mv infile orig
- "$prefix/replaceu" -i orig > infile || exit 1
+ mv orig orig2
+ cat /dev/null > orig
fi
+ cat /dev/null > infile
+ cat /dev/null > hat3.seed
+ seedoffset=0
+# echo "seednseq="$seednseq
+# echo "seedoffset="$seedoffset
+ set $seednseq >> "$progressfile"
+# echo $#
+ while [ $# -gt 1 ]
+ do
+ shift
+# echo "num="$#
- if [ $seed != "x" ]; then
- mv infile infile2
if [ $anysymbol -eq 1 ]; then
- mv orig orig2
- cat /dev/null > orig
+ cat seed$# >> orig
+ "$prefix/replaceu" $seqtype -i seed$# -o $seedoffset > clean 2>>"$progressfile" || exit 1
+ mv clean seed$#
fi
- cat /dev/null > infile
- cat /dev/null > hat3.seed
- seedoffset=0
-# echo "seednseq="$seednseq
-# echo "seedoffset="$seedoffset
- set $seednseq > /dev/null
-# echo $#
- while [ $# -gt 1 ]
- do
- shift
-# echo "num="$#
-
- if [ $anysymbol -eq 1 ]; then
- cat seed$# >> orig
- "$prefix/replaceu" -i seed$# -o $seedoffset > clean || exit 1
- mv clean seed$#
- fi
- "$prefix/multi2hat3s" -t $nseq -o $seedoffset -i seed$# >> infile || exit 1
- cat hat3 >> hat3.seed
-# echo "$1"
- seedoffset=`expr $seedoffset + $1`
-# echo "$1"
-# echo "seedoffset="$seedoffset
- done;
+ "$prefix/multi2hat3s" -t $nseq -o $seedoffset -i seed$# >> infile 2>>"$progressfile" || exit 1
+ cat hat3 >> hat3.seed
+# echo "$1"
+ seedoffset=`expr $seedoffset + $1`
+# echo "$1"
# echo "seedoffset="$seedoffset
- if [ $anysymbol -eq 1 ]; then
- "$prefix/replaceu" -i orig2 -o $seedoffset >> infile || exit 1 # yarinaoshi
- cat orig2 >> orig
- else
- cat infile2 >> infile
- fi
- elif [ $seedtable != "x" ]; then
- cat _seedtablefile > hat3.seed
+ done;
+# echo "seedoffset="$seedoffset
+ if [ $anysymbol -eq 1 ]; then
+ "$prefix/replaceu" $seqtype -i orig2 -o $seedoffset >> infile 2>>"$progressfile" || exit 1 # yarinaoshi
+ cat orig2 >> orig
else
- cat /dev/null > hat3.seed
+ cat infile2 >> infile
fi
-# cat hat3.seed
+ elif [ $seedtable != "x" ]; then
+ cat _seedtablefile > hat3.seed
+ elif [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then
+ mv infile infile2
+ if [ $anysymbol -eq 1 ]; then
+ mv orig orig2
+ cat /dev/null > orig
+ fi
+ cat /dev/null > infile
+
+ echo "strdir = " 1>>"$progressfile"
+ echo $strdir 1>>"$progressfile"
+
+ echo "Calling DASH (http://sysimm.ifrec.osaka-u.ac.jp/dash/)" 1>>"$progressfile"
+ perl "$prefix/mafftash_premafft.pl" -p pdblist -o ownlist -d "$strdir" 2>>"dasherr"
+ dashres="$?"
+ cat dasherr 1>>"$progressfile"
- if [ $mccaskill -eq 1 ]; then
- "$prefix/mccaskillwrap" -C $numthreads -d "$prefix" -i infile > hat4 || exit 1
- elif [ $contrafold -eq 1 ]; then
- "$prefix/contrafoldwrap" -d "$prefix" -i infile > hat4 || exit 1
+ if [ $dashres -ne "0" ]; then
+ echo "Error in DASH" 1>>"$progressfile"
+ exit 1;
fi
- if [ $distance = "fasta" ]; then
- "$prefix/dndfast7" $swopt < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "blast" ]; then
- "$prefix/dndblast" < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "foldalignlocal" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $lexp -f $lgop -h $laof -H -d "$prefix" < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "foldalignglobal" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $pgexp -f $pggop -h $pgaof -H -o -global -d "$prefix" < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "slara" ]; then
- "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -T -d "$prefix" < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "lara" ]; then
- "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -B -d "$prefix" < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "scarna" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -s -d "$prefix" < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "global" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -F < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "local" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -L < infile > /dev/null || exit 1
+ echo "Done." 1>>"$progressfile"
+
+ seedoffset=`grep -c '^[>|=]' instr | head -1 `
+
+ echo "# of structures = " 1>>"$progressfile"
+ echo $seedoffset 1>>"$progressfile"
+ mv hat3 hat3.seed
+
+ if [ $anysymbol -eq 1 ]; then
+ cat instr >> orig
+ "$prefix/replaceu" $seqtype -i instr -o 0 > clean 2>>"$progressfile" || exit 1
+ mv clean infile
+
+ "$prefix/replaceu" $seqtype -i orig2 -o $seedoffset >> infile 2>>"$progressfile" || exit 1 # yarinaoshi
+ cat orig2 >> orig
+ else
+ cat instr > infile
+ cat infile2 >> infile
+ fi
+ else
+ cat /dev/null > hat3.seed
+ fi
+# cat hat3.seed
+
+
+
+
+ if [ $mccaskill -eq 1 ]; then
+ "$prefix/mccaskillwrap" -s -C $numthreads -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1
+ elif [ $dafs -eq 1 ]; then
+ "$prefix/mccaskillwrap" -G -C $numthreads -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1
+ elif [ $contrafold -eq 1 ]; then
+ "$prefix/contrafoldwrap" -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1
+ fi
+ if [ $distance = "fasta" ]; then
+ "$prefix/dndfast7" $swopt < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "blast" ]; then
+ "$prefix/dndblast" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "foldalignlocal" ]; then
+ "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $lexp -f $lgop -Q $spfactor -h $laof -H -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "foldalignglobal" ]; then
+ "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -H -o -global -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "slara" ]; then
+ "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -Q $spfactor -T -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "lara" ]; then
+ "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -Q $spfactor -B -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "scarna" ]; then
+# "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -Q $spfactor -s -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+# cat hat3.seed hat3 > hatx
+# mv hatx hat3
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/tbfast" _ -C $numthreads $seqtype $model -f $pggop -Q $spfactor -s -d "$prefix" _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "dafs" ]; then
+ "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -Q $spfactor -G -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "global" ]; then
+# "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -A $usenaivepairscore $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# cat hat3.seed hat3 > hatx
+# mv hatx hat3
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -A $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+
+ elif [ $distance = "local" ]; then
+ if [ $fragment -ne 0 ]; then
+ "$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
cat hat3.seed hat3 > hatx
mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "globalgenaf" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -O $GGOP -E $GEXP -K < infile > /dev/null || exit 1
+ "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+# "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# addarg wo watasanai
+# cat hat3.seed hat3 > hatx
+# mv hatx hat3
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ fi
+ elif [ $distance = "globalgenaf" ]; then
+ "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -O $GGOP -E $GEXP -K $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "localgenaf" ]; then
+# "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -O $LGOP -E $LEXP -N $usenaivepairscore $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# cat hat3.seed hat3 > hatx
+# mv hatx hat3
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -O $LGOP -E $LEXP -N $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "last" ]; then
+ if [ $fragment -ne 0 ]; then
+ "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
cat hat3.seed hat3 > hatx
mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "localgenaf" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -O $LGOP -E $LEXP -N < infile > /dev/null || exit 1
+ "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ "$prefix/pairlocalalign" -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+# addarg wo watasanai
cat hat3.seed hat3 > hatx
mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "distonly" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -t < infile > /dev/null || exit 1
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "parttree" ]; then
- "$prefix/splittbfast" -Q $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft -p $partsize -s $groupsize $treealg -i infile > pre || exit 1
- mv hat3.seed hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ fi
+ elif [ $distance = "lastmulti" ]; then
+ "$prefix/dndpre" $model -M 2 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1
+ mv hat2 hat2i
+ "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -r $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hat2 hat2n
+ mv hatx hat3
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ elif [ $distance = "multi" ]; then
+ "$prefix/dndpre" $model -M 2 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
+ mv hat2 hat2i
+ "$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -Y $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hat2 hat2n
+ mv hatx hat3
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ elif [ $distance = "hybrid" ]; then
+ "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -Y < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/disttbfast" -E 1 -s $unalignlevel $legacygapopt -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
else
- "$prefix/disttbfast" -O $outnum $addarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $algopt $treealg $scoreoutarg < infile > pre || exit 1
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ fi
+# elif [ $distance = "distonly" ]; then
+# "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -t < infile > /dev/null 2>>"$progressfile" || exit 1
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "parttree" ]; then
+ "$prefix/splittbfast" $legacygapopt $algopt $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof -p $partsize -s $groupsize $treealg $outnum -i infile > pre 2>>"$progressfile" || exit 1
+ mv hat3.seed hat3
+ elif [ $distance = "ktuplesmulti" ]; then
+# "$prefix/dndpre" $model -M 1 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1
+# mv hat2 hat2i
+# "$prefix/disttbfast" -E 1 -s $unalignlevel $legacygapopt -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# mv hat2 hat2n
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -d -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ else
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ "$prefix/disttbfast" -q $npickup -E $cycledisttbfast -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > pre 2>>"$progressfile" || exit 1
mv hat3.seed hat3
fi
-
- while [ $cycle -gt 1 ]
- do
- if [ $distance = "parttree" ]; then
- mv pre infile
- "$prefix/splittbfast" -Z -Q $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft -p $partsize -s $groupsize $treealg -i infile > pre || exit 1
- else
- "$prefix/tbfast" -O $outnum -C $numthreads $rnaopt $weightopt $treeoutopt $distoutopt $memopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt -J $treealg $scoreoutarg < pre > /dev/null || exit 1
- fi
- cycle=`expr $cycle - 1`
- done
- if [ $iterate -gt 0 ]; then
- if [ $distance = "sixtuples" ]; then
- "$prefix/dndpre" -C $numthreads < pre > /dev/null 2>&1 || exit 1
- fi
- "$prefix/dvtditr" -C $numthreads -t $randomseed $rnaoptit $memopt $scorecalcopt $localparam -z 50 $seqtype $model -f "-"$gop -h "-"$aof -I $iterate $weightopt $treeinopt $algoptit $treealg -p $parallelizationstrategy $scoreoutarg < pre > /dev/null || exit 1
+ fi
+ while [ $cycletbfast -gt 1 ]
+ do
+ if [ $distance = "parttree" ]; then
+ mv pre infile
+ "$prefix/splittbfast" $legacygapopt -Z $algopt $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof -p $partsize -s $groupsize $treealg $outnum -i infile > pre 2>>"$progressfile" || exit 1
+ else
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum -C $numthreadstb $rnaopt $weightopt $treeoutopt $distoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt -J $treealg $scoreoutarg < pre > /dev/null 2>>"$progressfile" || exit 1
+# fragment>0 no baai, nanimoshinai
+# seed youchuui!!
+ fi
+ cycletbfast=`expr $cycletbfast - 1`
+ done
+ if [ $iterate -gt 0 ]; then
+ if [ $distance = "ktuples" ]; then
+ "$prefix/dndpre" $model -M 2 -C $numthreads < pre > /dev/null 2>>"$progressfile" || exit 1
fi
+ "$prefix/dvtditr" -W $minimumweight $bunkatsuopt -E $fixthreshold -s $unalignlevel $legacygapopt $mergearg $outnum -C $numthreadsit -t $randomseed $rnaoptit $memopt $scorecalcopt $localparam -z 50 $seqtype $model -f "-"$gop -Q $spfactor -h $aof -I $iterate $weightopt $treeinopt $algoptit $treealg -p $parallelizationstrategy $scoreoutarg < pre > /dev/null 2>>"$progressfile" || exit 1
fi
-
if [ $coreout -eq 1 ]; then
"$prefix/setcore" -w $corewin -i $corethr $coreext < pre > pre2
mv pre2 pre
elif [ $anysymbol -eq 1 ]; then
- "$prefix/restoreu" -a pre -i orig > restored || exit 1
+ "$prefix/restoreu" $add2ndhalfarg -a pre -i orig > restored || exit 1
mv restored pre
fi
- if [ $quiet -eq 0 ]; then
- echo '' 1>&2
- if [ $mccaskill -eq 1 ]; then
- echo "RNA base pairing probaility was calculated by the McCaskill algorithm (1)" 1>&2
- echo "implemented in Vienna RNA package (2) and MXSCARNA (3), and then" 1>&2
- echo "incorporated in the iterative alignment process (4)." 1>&2
- echo "(1) McCaskill, 1990, Biopolymers 29:1105-1119" 1>&2
- echo "(2) Hofacker et al., 2002, J. Mol. Biol. 319:3724-3732" 1>&2
- echo "(3) Tabei et al., 2008, BMC Bioinformatics 9:33" 1>&2
- echo "(4) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>&2
- echo "" 1>&2
- elif [ $contrafold -eq 1 ]; then
- echo "RNA base pairing probaility was calculated by the CONTRAfold algorithm (1)" 1>&2
- echo "and then incorporated in the iterative alignment process (4)." 1>&2
- echo "(1) Do et al., 2006, Bioinformatics 22:e90-98" 1>&2
- echo "(2) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>&2
- echo "" 1>&2
- fi
- if [ $distance = "fasta" -o $partdist = "fasta" ]; then
- echo "Pairwise alignments were computed by FASTA" 1>&2
- echo "(Pearson & Lipman, 1988, PNAS 85:2444-2448)" 1>&2
- fi
- if [ $distance = "blast" ]; then
- echo "Pairwise alignments were computed by BLAST" 1>&2
- echo "(Altschul et al., 1997, NAR 25:3389-3402)" 1>&2
- fi
- if [ $distance = "scarna" ]; then
- echo "Pairwise alignments were computed by MXSCARNA" 1>&2
- echo "(Tabei et al., 2008, BMC Bioinformatics 9:33)." 1>&2
- fi
- if [ $distance = "lara" -o $distance = "slara" ]; then
- echo "Pairwise alignments were computed by LaRA" 1>&2
- echo "(Bauer et al., 2007, BMC Bioinformatics 8:271)." 1>&2
- fi
- if [ $distance = "foldalignlocal" ]; then
- echo "Pairwise alignments were computed by FOLDALIGN (local)" 1>&2
- echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>&2
- fi
- if [ $distance = "foldalignglobal" ]; then
- echo "Pairwise alignments were computed by FOLDALIGN (global)" 1>&2
- echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>&2
+
+
+
+ echo '' 1>>"$progressfile"
+ if [ $mccaskill -eq 1 ]; then
+ echo "RNA base pairing probaility was calculated by the McCaskill algorithm (1)" 1>>"$progressfile"
+ echo "implemented in Vienna RNA package (2) and MXSCARNA (3), and then" 1>>"$progressfile"
+ echo "incorporated in the iterative alignment process (4)." 1>>"$progressfile"
+ echo "(1) McCaskill, 1990, Biopolymers 29:1105-1119" 1>>"$progressfile"
+ echo "(2) Hofacker et al., 2002, J. Mol. Biol. 319:3724-3732" 1>>"$progressfile"
+ echo "(3) Tabei et al., 2008, BMC Bioinformatics 9:33" 1>>"$progressfile"
+ echo "(4) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ elif [ $contrafold -eq 1 ]; then
+ echo "RNA base pairing probaility was calculated by the CONTRAfold algorithm (1)" 1>>"$progressfile"
+ echo "and then incorporated in the iterative alignment process (4)." 1>>"$progressfile"
+ echo "(1) Do et al., 2006, Bioinformatics 22:e90-98" 1>>"$progressfile"
+ echo "(2) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ fi
+ if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then
+ echo "Input structures are decomposed into structural domains using" 1>>"$progressfile"
+ echo "Protein Domain Parser (Alexandrov & Shindyalov 2003)." 1>>"$progressfile"
+ echo "Domain pairs are aligned using the rash function in" 1>>"$progressfile"
+ echo "the ASH structural alignment package (Standley et al. 2007)." 1>>"$progressfile"
+ fi
+ if [ $pdblist != "/dev/null" ]; then
+ echo "Pre-computed alignments stored in " 1>>"$progressfile"
+ echo "DASH (http://sysimm.ifrec.osaka-u.ac.jp/dash/) are used. " 1>>"$progressfile"
+ fi
+ if [ $distance = "fasta" -o $partdist = "fasta" ]; then
+ echo "Pairwise alignments were computed by FASTA" 1>>"$progressfile"
+ echo "(Pearson & Lipman, 1988, PNAS 85:2444-2448)" 1>>"$progressfile"
+ fi
+ if [ $distance = "blast" ]; then
+ echo "Pairwise alignments were computed by BLAST" 1>>"$progressfile"
+ echo "(Altschul et al., 1997, NAR 25:3389-3402)" 1>>"$progressfile"
+ fi
+ if [ $distance = "last" -o $distance = "lastmulti" ]; then
+ echo "Pairwise alignments were computed by LAST" 1>>"$progressfile"
+ echo "http://last.cbrc.jp/" 1>>"$progressfile"
+ echo "Kielbasa, Wan, Sato, Horton, Frith 2011 Genome Res. 21:487" 1>>"$progressfile"
+ fi
+ if [ $distance = "scarna" ]; then
+ echo "Pairwise alignments were computed by MXSCARNA" 1>>"$progressfile"
+ echo "(Tabei et al., 2008, BMC Bioinformatics 9:33)." 1>>"$progressfile"
+ fi
+ if [ $distance = "dafs" ]; then
+ echo "Pairwise alignments were computed by DAFS" 1>>"$progressfile"
+ echo "(Sato et al., 2012,,,,)." 1>>"$progressfile"
+ fi
+ if [ $distance = "lara" -o $distance = "slara" ]; then
+ echo "Pairwise alignments were computed by LaRA" 1>>"$progressfile"
+ echo "(Bauer et al., 2007, BMC Bioinformatics 8:271)." 1>>"$progressfile"
+ fi
+ if [ $distance = "foldalignlocal" ]; then
+ echo "Pairwise alignments were computed by FOLDALIGN (local)" 1>>"$progressfile"
+ echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>>"$progressfile"
+ fi
+ if [ $distance = "foldalignglobal" ]; then
+ echo "Pairwise alignments were computed by FOLDALIGN (global)" 1>>"$progressfile"
+ echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>>"$progressfile"
+ fi
+# printf "\n" 1>>"$progressfile"
+ echo 'Strategy:' 1>>"$progressfile"
+ printf ' '$strategy 1>>"$progressfile"
+ echo ' ('$performance')' 1>>"$progressfile"
+ echo ' '$explanation 1>>"$progressfile"
+ echo '' 1>>"$progressfile"
+ echo "If unsure which option to use, try 'mafft --auto input > output'." 1>>"$progressfile"
+ echo "For more information, see 'mafft --help', 'mafft --man' and the mafft page." 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ echo "The default gap scoring scheme has been changed in version 7.110 (2013 Oct)." 1>>"$progressfile"
+ echo "It tends to insert more gaps into gap-rich regions than previous versions." 1>>"$progressfile"
+ echo "To disable this change, add the --leavegappyregion option." 1>>"$progressfile"
+# echo "If long gaps are expected, try 'mafft --ep 0.0 --auto input > output'." 1>>"$progressfile"
+# echo "If the possibility of long gaps can be excluded, add '--ep 0.123'." 1>>"$progressfile"
+ if [ $distance = "localgenaf" -o $distance = "globalgenaf" ]; then
+ echo "" 1>>"$progressfile"
+ if [ $oldgenafparam -eq 1 ]; then
+ echo "Obsolete parameters used for this calculation." 1>>"$progressfile"
+ echo "Also try the new parameters for E-INS-i, by not specifying --oldgenafpair." 1>>"$progressfile"
+ else
+ echo "Parameters for the E-INS-i option have been changed in version 7.243 (2015 Jun)." 1>>"$progressfile"
+ echo "To switch to the old parameters, use --oldgenafpair, instead of --genafpair." 1>>"$progressfile"
fi
- printf "\n" 1>&2
- echo 'Strategy:' 1>&2
- printf ' '$strategy 1>&2
- echo ' ('$performance')' 1>&2
- echo ' '$explanation 1>&2
- echo '' 1>&2
- echo "If unsure which option to use, try 'mafft --auto input > output'." 1>&2
-# echo "If long gaps are expected, try 'mafft --ep 0.0 --auto input > output'." 1>&2
- echo "If the possibility of long gaps can be excluded, add '--ep 0.123'." 1>&2
- echo "For more information, see 'mafft --help', 'mafft --man' and the mafft page." 1>&2
- echo '' 1>&2
fi
- )
+ echo '' 1>>"$progressfile"
+
+
+ if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then
+ cat dasherr >>"$progressfile"
+ fi
+
+# ) # 2017/Mar/17
+ popd > /dev/null;
+
if [ "$outputfile" = "" ]; then
if [ "$outputopt" = "null" ]; then
cat < $TMPFILE/pre || exit 1
else
- "$prefix/f2cl" -n $namelength $outputopt < $TMPFILE/pre || exit 1
+ "$prefix/f2cl" -n $namelength $outputopt < $TMPFILE/pre 2>>/dev/null || exit 1
fi
else
if [ "$outputopt" = "null" ]; then
cat < $TMPFILE/pre > "$outputfile" || exit 1
else
- "$prefix/f2cl" -n $namelength $outputopt < $TMPFILE/pre > "$outputfile" || exit 1
+ "$prefix/f2cl" -n $namelength $outputopt < $TMPFILE/pre > "$outputfile" 2>>/dev/null || exit 1
fi
fi
cp $TMPFILE/infile.tree "$infilename.tree"
fi
+ if [ -s $TMPFILE/GuideTree ]; then # --merge no toki dake
+ cp $TMPFILE/GuideTree .
+ fi
+
if [ $distout -eq 1 ]; then
cp $TMPFILE/hat2 "$infilename.hat2"
fi
+ if [ $npickup -ne 0 ]; then
+ cp $TMPFILE/notused "$infilename.notused"
+ fi
+
+ if [ -s $TMPFILE/_deletemap ]; then
+ if [ "$mapoutfile" = "/dev/null" ]; then
+ cp $TMPFILE/_deletemap "$addfile.map"
+ else
+ cp $TMPFILE/_deletemap "$mapoutfile"
+ fi
+ fi
+
exit 0;
fi
printf( "\n" ) > "/dev/tty";
printf( " MAFFT %s\n", version ) > "/dev/tty";
printf( "\n" ) > "/dev/tty";
- printf( " Copyright (c) 2011 Kazutaka Katoh\n" ) > "/dev/tty";
- printf( " NAR 30:3059-3066, NAR 33:511-518\n" ) > "/dev/tty";
- printf( " http://mafft.cbrc.jp/alignment/software/\n" ) > "/dev/tty";
+ printf( " Copyright (c) 2016 Kazutaka Katoh\n" ) > "/dev/tty";
+ printf( " MBE 30:772-780 (2013), NAR 30:3059-3066 (2002)\n" ) > "/dev/tty";
+ printf( " http://mafft.cbrc.jp/alignment/software/\n" ) > "/dev/tty";
printf( "---------------------------------------------------------------------\n" ) > "/dev/tty";
printf( "\n" ) > "/dev/tty";
while( 1 )
{
printf( "\n" ) > "/dev/tty";
- printf( "Additional arguments? (--ep #, --op #, --kappa #, etc)\n" ) > "/dev/tty";
+ printf( "Additional arguments? (--ep # --op # --kappa # etc)\n" ) > "/dev/tty";
printf( "@ " ) > "/dev/tty";
res = getline < "/dev/tty";
close( "/dev/tty" );
printf( "\n" ) > "/dev/tty";
}
system( command );
- command = sprintf( "less \"%s\"", outfile );
+ command = sprintf( "more \"%s\"", outfile );
system( command );
printf( "Press Enter to exit." ) > "/dev/tty";
res = getline < "/dev/tty";
--- /dev/null
+#!/usr/bin/perl
+
+#####################################################################
+# Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp)
+#
+# Ver. Date Changelog
+#####################################################################
+# 1.0 07.26.13 Initial release
+# 2.0 09.03.13 Added extensive warnings and error messages
+# 3.0 10.28.13 Fix for retrieving large files. Added STDERR logs
+# 3.1 11.08.13 Added LWP failsafe. Made hat3 not a required output
+# 3.2 12.08.14 Removed 5-char restriction for own structure files
+#
+#####################################################################
+
+use strict;
+use Getopt::Long;
+use File::Path qw(make_path remove_tree);
+use LWP::Simple;
+use LWP::UserAgent;
+
+# to prevent error 'Header line too long (limit is 8192)' [v3.1]
+use LWP::Protocol::http;
+push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0);
+
+
+
+my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/MAFFTash/REST/service.cgi/premafft";
+
+my ( $WORKDIR, $PDBLIST, $OWNLIST, $HAT3FILE, $INSTRFILE );
+
+GetOptions
+(
+ 'd=s' => \$WORKDIR,
+ 'p=s' => \$PDBLIST,
+ 'o=s' => \$OWNLIST,
+ 'h=s' => \$HAT3FILE,
+ 'i=s' => \$INSTRFILE,
+);
+
+print STDERR "[MAFFTash-premafft]\n";
+
+# set temp directory
+my $TMP = "/tmp/mapremafft$$";
+make_path($TMP) unless -d $TMP;
+
+
+
+######
+# validation
+&help("Required parameter : atleast one of either '-p' or '-o'") unless ( defined $PDBLIST || defined $OWNLIST);
+&help("Required parameter : '-d'") if defined $OWNLIST && ! defined $WORKDIR;
+
+$HAT3FILE = "hat3" unless defined $HAT3FILE;
+$INSTRFILE = "instr" unless defined $INSTRFILE;
+chop $WORKDIR if defined $WORKDIR && $WORKDIR =~ m/\/$/g;
+
+
+######
+# prepare inputs
+print STDERR "Preparing inputs for service request...\n";
+
+my @files = ();
+push(@files, "strweight" => "0.5");
+push(@files, "premafft" => "1");
+
+
+# pdb entries
+if ( defined $PDBLIST )
+{
+ print STDERR "PDB List defined!\n";
+ &bail("Error: Input file $PDBLIST does not exists!") unless -e $PDBLIST;
+ my $listfile = "$TMP/pdblist.inp";
+
+
+ open(INPF,"<$PDBLIST") or &bail("Error: Cannot open file $PDBLIST for reading!");
+ open(OUTF,">$listfile") or &bail("Error: Cannot open temporary file $listfile for writing!");
+
+ while(<INPF>)
+ {
+ chomp;
+ if ( /^(\w{5})$/ )
+ {
+ print OUTF ">PDBID\n$1\n";
+ }
+ }
+
+ close OUTF;
+ close INPF;
+
+ push(@files, "inputfile" => ["$listfile"]);
+}
+
+
+
+# upload own structures
+my %ownids = ();
+
+if ( defined $OWNLIST )
+{
+ print STDERR "OWN List defined!\n";
+ &bail("Error: Input file $OWNLIST does not exists!") unless -e $OWNLIST;
+
+
+ open(OWNINPF,"<$OWNLIST") or &bail("Error: Cannot open file $OWNLIST for reading!");
+
+ while(<OWNINPF>)
+ {
+ chomp;
+
+ if ( /^(\S+)$/ )
+ {
+ my $fileref = "$WORKDIR/$1.pdb";
+
+ unless (-e $fileref)
+ {
+ close OWNINPF;
+ &bail("Error: File $fileref does not exists!");
+ }
+
+ push(@files, "inputownfile[]" => ["$fileref"]);
+ $ownids{$1} = 1;
+ }
+ }
+
+ close OWNINPF;
+}
+
+
+
+######
+# start rest service
+print STDERR "Sending service request...\n";
+
+my $browser = LWP::UserAgent->new;
+$browser->timeout(0);
+
+
+# post: running a mafftash job
+my $postResponse = $browser->post( $BASEURL, \@files, 'Content_Type' => 'form-data' );
+&bail(sprintf("[%d] %s\n", $postResponse->code, &parseError($postResponse->content))) unless($postResponse->is_success);
+
+
+# get response from post request
+my ($status, $mafftashid) = &parseResponse($postResponse->content);
+
+
+
+my $MAXTRIES = 3;
+my $STIMER = 4;
+my $longtimer = 0;
+
+print STDERR "Request sent! Waiting for response...[$mafftashid]\n";
+
+
+# wait for results until it becomes available
+while(1)
+{
+ $longtimer = $longtimer <= ($STIMER*3) ? $longtimer+$STIMER : $STIMER;
+ sleep $longtimer;
+
+
+ # get: get results for mafftash job
+ my $getResponse = $browser->get("$BASEURL/$mafftashid");
+
+ if ( $getResponse->is_success )
+ {
+
+ # get response from get request
+ ($status, $mafftashid) = &parseResponse($getResponse->content);
+ next unless ( $status eq "done" );
+
+
+ # if job is finished and ready
+ print STDERR "Results found!\n";
+ my $csfile = "$TMP/checksum.tar.gz";
+ my $try1 = 1;
+
+
+ while(1)
+ {
+ print STDERR "Fetching Results... [Trial $try1]\n";
+
+ if ( is_success(getstore("$BASEURL/getmdlist/$mafftashid", $csfile)) && -e $csfile && -s $csfile )
+ {
+ # get response from get request
+ my $checklist = &extractchecksum($csfile);
+ &bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 );
+
+
+ foreach my $id ( keys %$checklist )
+ {
+ my $checkfile = "$TMP/$id";
+ my $checkid = $checklist->{$id};
+ my $try2 = 1;
+
+ while(1)
+ {
+ unlink $checkfile if -e $checkfile;
+
+ if ( is_success(getstore("$BASEURL/get/$mafftashid/$id", $checkfile)) && -e $checkfile && -s $checkfile )
+ {
+ my $hashid = &getchecksum($checkfile);
+ #print STDERR "[hashid]$hashid [checkid]$checkid\n";
+
+ if ($hashid ne "" && $hashid ne $checkid )
+ {
+ unlink $checkfile if -e $checkfile;
+ &bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES;
+ $try2++;
+ sleep $STIMER;
+ }
+ else
+ {
+ last;
+ }
+ }
+ else
+ {
+ &bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES;
+ $try2++;
+ sleep $STIMER;
+ }
+ }
+ }
+
+ last;
+ }
+ else
+ {
+ &bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES;
+ $try1++;
+ sleep $STIMER;
+ }
+ }
+
+ last;
+
+ }
+ else
+ {
+ &bail(sprintf("[%d] %s\n", $getResponse->code, &parseError($getResponse->content)));
+ }
+
+}
+
+
+# make sure outputs were generated
+# decompress
+print STDERR "Assembling final results...\n";
+
+&backticks("cat $TMP/archive.tar.gz* | tar -zxf - -C $TMP/");
+&backticks("mv -f $TMP/instr $INSTRFILE") if -e "$TMP/instr";
+&backticks("mv -f $TMP/hat3 $HAT3FILE") if -e "$TMP/hat3";
+
+# sometimes no hat3 file is generated [v3.1]
+#&bail("Error: Output file $HAT3FILE not found!") unless -e $HAT3FILE;
+&bail("Error: Output file $INSTRFILE not found!") unless -e $INSTRFILE;
+
+
+# warn if some ownids were ommitted
+if ( scalar keys(%ownids) > 0 )
+{
+ my %instrids = ();
+
+ open(INSTRF,"<$INSTRFILE") or &bail("Error: Cannot open file $INSTRFILE for reading!");
+
+ while(<INSTRF>)
+ {
+ chomp;
+
+ if ( /^>\d+_(\S+)$/ )
+ {
+ $instrids{$1} = 1;
+ }
+ }
+
+ close INSTRF;
+
+ foreach my $id ( keys %ownids )
+ {
+ warn "Warning: Own structure $id was excluded from instr/hat3.\n" unless $instrids{$id};
+ }
+
+}
+
+
+
+&cleanup();
+
+
+
+####################
+####################
+
+
+
+sub parseResponse
+{
+ my $response = shift;
+
+ #"status":"wait","mafftashid":"Ma8211432R"
+
+ my $status = "";
+ my $mafftashid = "";
+
+ if ( $response =~ /^([^\s:]+):([^\s:]+)$/ )
+ {
+ $mafftashid = $1;
+ $status = $2;
+ }
+
+ return ($status, $mafftashid);
+
+}
+
+
+sub extractchecksum
+{
+ my $infile = shift;
+ my %dataset = ();
+
+ open CSUM, "tar -zxf $infile -O|" or return \%dataset;
+
+ while(<CSUM>)
+ {
+ chomp;
+ if ( /^(\S+)\s+(\S+)$/ )
+ {
+ $dataset{$2} = $1;
+ }
+
+ }
+
+ close CSUM;
+
+ return \%dataset;
+
+}
+
+
+sub parseError
+{
+ my $response = shift;
+
+ #"error":"Invalid number of inputs found."
+ my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : "";
+ return $errorstr;
+}
+
+
+sub getchecksum
+{
+ my $infile = shift;
+
+ # md5 binary check
+ my $MD5BIN = "";
+
+ if ( -x "/usr/bin/md5sum" )
+ {
+ $MD5BIN = "/usr/bin/md5sum";
+ }
+ elsif ( -x "/sbin/md5" )
+ {
+ $MD5BIN = "/sbin/md5 -q";
+ }
+
+ return "" if $MD5BIN eq "";
+
+
+ my $checksum = "";
+ open MD5EXE, "$MD5BIN $infile|" or return "";
+
+ while(<MD5EXE>)
+ {
+ if (/^(\S+)\s+(\S+)$/)
+ {
+ $checksum = $1;
+ last;
+ }
+ elsif (/^(\S+)$/)
+ {
+ $checksum = $1;
+ last;
+ }
+ }
+
+ close MD5EXE;
+
+ return $checksum;
+
+}
+
+
+sub backticks
+{
+ my $command = shift;
+
+ `$command`;
+ return ($? == -1) ? 0 : 1;
+}
+
+
+sub bail
+{
+ my $str = shift;
+ print STDERR "$str\n" if defined $str;
+
+ &cleanup();
+ exit(1);
+}
+
+
+sub cleanup
+{
+ return if ($TMP eq "" || !-d $TMP);
+
+ opendir(MAINDIR, $TMP);
+ my @files = readdir(MAINDIR);
+ closedir(MAINDIR);
+
+ foreach my $file (@files)
+ {
+ unlink "$TMP/$file" if -e "$TMP/$file";
+ }
+
+ remove_tree($TMP);
+
+}
+
+
+sub help
+{
+ my $str = shift;
+
+ print <<'HELPME';
+
+USAGE
+ ./mafftash_premafft.pl -p [FILE]
+ ./mafftash_premafft.pl -o [FILE] -d [DIRECTORY]
+ ./mafftash_premafft.pl -p [FILE] -o [FILE] -d [DIRECTORY]
+
+
+PARAMETERS
+ -p [FILE]
+ FILE contains a list of PDBIDs (one entry per line); make sure that the PDBIDs are in the standard 5-character pdbid+chain naming format
+
+ -o [FILE] -d [DIRECTORY]
+ FILE contains a list of IDs from your own structure/pdb files (one entry per line)
+ for each ID in the list make sure that a corresponding structure file (same ID with .pdb extension) is stored in DIRECTORY
+
+ -h [HATFILE]
+ save the output hat3 file in HATFILE; if not set, the output is written to a file named 'hat3' in your current directory
+
+ -i [INSTRFILE]
+ save the output instr file in INSTRFILE; if not set, the output is written to a file named 'instr' in your current directory
+
+HELPME
+
+ &bail($str);
+}
+
+
+
--- /dev/null
+#!/usr/bin/perl
+
+#####################################################################
+# Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp)
+#
+# Ver. Date Changelog
+#####################################################################
+# 1.0 07.26.13 Initial release
+# 2.0 09.03.13 Added extensive warnings and error messages
+# 3.0 10.28.13 Fix for retrieving large files. Added STDERR logs
+# 3.1 11.08.13 Added LWP failsafe. Made hat3 not a required output
+# 3.2 12.08.14 Removed 5-char restriction for own structure files
+#
+#####################################################################
+
+use strict;
+use Getopt::Long;
+use File::Path qw(make_path remove_tree);
+use LWP::Simple;
+use LWP::UserAgent;
+
+# to prevent error 'Header line too long (limit is 8192)' [v3.1]
+use LWP::Protocol::http;
+push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0);
+
+
+
+my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/MAFFTash/REST/service.cgi/premafft";
+
+my ( $WORKDIR, $PDBLIST, $OWNLIST, $HAT3FILE, $INSTRFILE );
+
+GetOptions
+(
+ 'd=s' => \$WORKDIR,
+ 'p=s' => \$PDBLIST,
+ 'o=s' => \$OWNLIST,
+ 'h=s' => \$HAT3FILE,
+ 'i=s' => \$INSTRFILE,
+);
+
+print STDERR "[MAFFTash-premafft]\n";
+
+# set temp directory
+my $TMP = "/tmp/mapremafft$$";
+make_path($TMP) unless -d $TMP;
+
+
+
+######
+# validation
+&help("Required parameter : atleast one of either '-p' or '-o'") unless ( defined $PDBLIST || defined $OWNLIST);
+&help("Required parameter : '-d'") if defined $OWNLIST && ! defined $WORKDIR;
+
+$HAT3FILE = "hat3" unless defined $HAT3FILE;
+$INSTRFILE = "instr" unless defined $INSTRFILE;
+chop $WORKDIR if defined $WORKDIR && $WORKDIR =~ m/\/$/g;
+
+
+######
+# prepare inputs
+print STDERR "Preparing inputs for service request...\n";
+
+my @files = ();
+push(@files, "strweight" => "0.5");
+push(@files, "premafft" => "1");
+
+
+# pdb entries
+if ( defined $PDBLIST )
+{
+ print STDERR "PDB List defined!\n";
+ &bail("Error: Input file $PDBLIST does not exists!") unless -e $PDBLIST;
+ my $listfile = "$TMP/pdblist.inp";
+
+
+ open(INPF,"<$PDBLIST") or &bail("Error: Cannot open file $PDBLIST for reading!");
+ open(OUTF,">$listfile") or &bail("Error: Cannot open temporary file $listfile for writing!");
+
+ while(<INPF>)
+ {
+ chomp;
+ if ( /^(\w{5})$/ )
+ {
+ print OUTF ">PDBID\n$1\n";
+ }
+ }
+
+ close OUTF;
+ close INPF;
+
+ push(@files, "inputfile" => ["$listfile"]);
+}
+
+
+
+# upload own structures
+my %ownids = ();
+
+if ( defined $OWNLIST )
+{
+ print STDERR "OWN List defined!\n";
+ &bail("Error: Input file $OWNLIST does not exists!") unless -e $OWNLIST;
+
+
+ open(OWNINPF,"<$OWNLIST") or &bail("Error: Cannot open file $OWNLIST for reading!");
+
+ while(<OWNINPF>)
+ {
+ chomp;
+
+ if ( /^(\S+)$/ )
+ {
+ my $fileref = "$WORKDIR/$1.pdb";
+
+ unless (-e $fileref)
+ {
+ close OWNINPF;
+ &bail("Error: File $fileref does not exists!");
+ }
+
+ push(@files, "inputownfile[]" => ["$fileref"]);
+ $ownids{$1} = 1;
+ }
+ }
+
+ close OWNINPF;
+}
+
+
+
+######
+# start rest service
+print STDERR "Sending service request...\n";
+
+my $browser = LWP::UserAgent->new;
+$browser->timeout(0);
+
+
+# post: running a mafftash job
+my $postResponse = $browser->post( $BASEURL, \@files, 'Content_Type' => 'form-data' );
+&bail(sprintf("[%d] %s\n", $postResponse->code, &parseError($postResponse->content))) unless($postResponse->is_success);
+
+
+# get response from post request
+my ($status, $mafftashid) = &parseResponse($postResponse->content);
+
+
+
+my $MAXTRIES = 3;
+my $STIMER = 4;
+my $longtimer = 0;
+
+print STDERR "Request sent! Waiting for response...[$mafftashid]\n";
+
+
+# wait for results until it becomes available
+while(1)
+{
+ $longtimer = $longtimer <= ($STIMER*3) ? $longtimer+$STIMER : $STIMER;
+ sleep $longtimer;
+
+
+ # get: get results for mafftash job
+ my $getResponse = $browser->get("$BASEURL/$mafftashid");
+
+ if ( $getResponse->is_success )
+ {
+
+ # get response from get request
+ ($status, $mafftashid) = &parseResponse($getResponse->content);
+ next unless ( $status eq "done" );
+
+
+ # if job is finished and ready
+ print STDERR "Results found!\n";
+ my $csfile = "$TMP/checksum.tar.gz";
+ my $try1 = 1;
+
+
+ while(1)
+ {
+ print STDERR "Fetching Results... [Trial $try1]\n";
+
+ if ( is_success(getstore("$BASEURL/getmdlist/$mafftashid", $csfile)) && -e $csfile && -s $csfile )
+ {
+ # get response from get request
+ my $checklist = &extractchecksum($csfile);
+ &bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 );
+
+
+ foreach my $id ( keys %$checklist )
+ {
+ my $checkfile = "$TMP/$id";
+ my $checkid = $checklist->{$id};
+ my $try2 = 1;
+
+ while(1)
+ {
+ unlink $checkfile if -e $checkfile;
+
+ if ( is_success(getstore("$BASEURL/get/$mafftashid/$id", $checkfile)) && -e $checkfile && -s $checkfile )
+ {
+ my $hashid = &getchecksum($checkfile);
+ #print STDERR "[hashid]$hashid [checkid]$checkid\n";
+
+ if ($hashid ne "" && $hashid ne $checkid )
+ {
+ unlink $checkfile if -e $checkfile;
+ &bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES;
+ $try2++;
+ sleep $STIMER;
+ }
+ else
+ {
+ last;
+ }
+ }
+ else
+ {
+ &bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES;
+ $try2++;
+ sleep $STIMER;
+ }
+ }
+ }
+
+ last;
+ }
+ else
+ {
+ &bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES;
+ $try1++;
+ sleep $STIMER;
+ }
+ }
+
+ last;
+
+ }
+ else
+ {
+ &bail(sprintf("[%d] %s\n", $getResponse->code, &parseError($getResponse->content)));
+ }
+
+}
+
+
+# make sure outputs were generated
+# decompress
+print STDERR "Assembling final results...\n";
+
+&backticks("cat $TMP/archive.tar.gz* | tar -zxf - -C $TMP/");
+&backticks("mv -f $TMP/instr $INSTRFILE") if -e "$TMP/instr";
+&backticks("mv -f $TMP/hat3 $HAT3FILE") if -e "$TMP/hat3";
+
+# sometimes no hat3 file is generated [v3.1]
+#&bail("Error: Output file $HAT3FILE not found!") unless -e $HAT3FILE;
+&bail("Error: Output file $INSTRFILE not found!") unless -e $INSTRFILE;
+
+
+# warn if some ownids were ommitted
+if ( scalar keys(%ownids) > 0 )
+{
+ my %instrids = ();
+
+ open(INSTRF,"<$INSTRFILE") or &bail("Error: Cannot open file $INSTRFILE for reading!");
+
+ while(<INSTRF>)
+ {
+ chomp;
+
+ if ( /^>\d+_(\S+)$/ )
+ {
+ $instrids{$1} = 1;
+ }
+ }
+
+ close INSTRF;
+
+ foreach my $id ( keys %ownids )
+ {
+ warn "Warning: Own structure $id was excluded from instr/hat3.\n" unless $instrids{$id};
+ }
+
+}
+
+
+
+&cleanup();
+
+
+
+####################
+####################
+
+
+
+sub parseResponse
+{
+ my $response = shift;
+
+ #"status":"wait","mafftashid":"Ma8211432R"
+
+ my $status = "";
+ my $mafftashid = "";
+
+ if ( $response =~ /^([^\s:]+):([^\s:]+)$/ )
+ {
+ $mafftashid = $1;
+ $status = $2;
+ }
+
+ return ($status, $mafftashid);
+
+}
+
+
+sub extractchecksum
+{
+ my $infile = shift;
+ my %dataset = ();
+
+ open CSUM, "tar -zxf $infile -O|" or return \%dataset;
+
+ while(<CSUM>)
+ {
+ chomp;
+ if ( /^(\S+)\s+(\S+)$/ )
+ {
+ $dataset{$2} = $1;
+ }
+
+ }
+
+ close CSUM;
+
+ return \%dataset;
+
+}
+
+
+sub parseError
+{
+ my $response = shift;
+
+ #"error":"Invalid number of inputs found."
+ my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : "";
+ return $errorstr;
+}
+
+
+sub getchecksum
+{
+ my $infile = shift;
+
+ # md5 binary check
+ my $MD5BIN = "";
+
+ if ( -x "/usr/bin/md5sum" )
+ {
+ $MD5BIN = "/usr/bin/md5sum";
+ }
+ elsif ( -x "/sbin/md5" )
+ {
+ $MD5BIN = "/sbin/md5 -q";
+ }
+
+ return "" if $MD5BIN eq "";
+
+
+ my $checksum = "";
+ open MD5EXE, "$MD5BIN $infile|" or return "";
+
+ while(<MD5EXE>)
+ {
+ if (/^(\S+)\s+(\S+)$/)
+ {
+ $checksum = $1;
+ last;
+ }
+ elsif (/^(\S+)$/)
+ {
+ $checksum = $1;
+ last;
+ }
+ }
+
+ close MD5EXE;
+
+ return $checksum;
+
+}
+
+
+sub backticks
+{
+ my $command = shift;
+
+ `$command`;
+ return ($? == -1) ? 0 : 1;
+}
+
+
+sub bail
+{
+ my $str = shift;
+ print STDERR "$str\n" if defined $str;
+
+ &cleanup();
+ exit(1);
+}
+
+
+sub cleanup
+{
+ return if ($TMP eq "" || !-d $TMP);
+
+ opendir(MAINDIR, $TMP);
+ my @files = readdir(MAINDIR);
+ closedir(MAINDIR);
+
+ foreach my $file (@files)
+ {
+ unlink "$TMP/$file" if -e "$TMP/$file";
+ }
+
+ remove_tree($TMP);
+
+}
+
+
+sub help
+{
+ my $str = shift;
+
+ print <<'HELPME';
+
+USAGE
+ ./mafftash_premafft.pl -p [FILE]
+ ./mafftash_premafft.pl -o [FILE] -d [DIRECTORY]
+ ./mafftash_premafft.pl -p [FILE] -o [FILE] -d [DIRECTORY]
+
+
+PARAMETERS
+ -p [FILE]
+ FILE contains a list of PDBIDs (one entry per line); make sure that the PDBIDs are in the standard 5-character pdbid+chain naming format
+
+ -o [FILE] -d [DIRECTORY]
+ FILE contains a list of IDs from your own structure/pdb files (one entry per line)
+ for each ID in the list make sure that a corresponding structure file (same ID with .pdb extension) is stored in DIRECTORY
+
+ -h [HATFILE]
+ save the output hat3 file in HATFILE; if not set, the output is written to a file named 'hat3' in your current directory
+
+ -i [INSTRFILE]
+ save the output instr file in INSTRFILE; if not set, the output is written to a file named 'instr' in your current directory
+
+HELPME
+
+ &bail($str);
+}
+
+
+
--- /dev/null
+#include "mltaln.h"
+
+#define DEBUG 0
+#define IODEBUG 0
+#define SCOREOUT 0
+
+#define GLOBAL 0
+
+#define END_OF_VEC -1
+
+int nadd;
+double thresholdtorev;
+int dodp;
+int addfragment;
+int mode = '2';
+int reflim = 1000;
+int contrastsort = 1;
+
+typedef struct _thread_arg
+{
+ int iend;
+ char **seq;
+ int *map;
+ char *tmpseq;
+ int *res;
+ int **spointt;
+ short *table1;
+ int iq;
+#ifdef enablemultithread
+ int *jshare;
+ int thread_no;
+ pthread_mutex_t *mutex_counter;
+#endif
+} thread_arg_t;
+
+typedef struct _selfdpthread_arg
+{
+ int iend;
+ char **seq;
+ double *res;
+#ifdef enablemultithread
+ int *jshare;
+ int thread_no;
+ pthread_mutex_t *mutex_counter;
+#endif
+} selfdpthread_arg_t;
+
+typedef struct _contrast
+{
+ int pos;
+ double dif;
+} contrastarr;
+
+static void *selfdpthread( void *arg )
+{
+ selfdpthread_arg_t *targ = (selfdpthread_arg_t *)arg;
+ int iend = targ->iend;
+ char **seq = targ->seq;
+ double *res = targ->res;
+#ifdef enablemultithread
+ int thread_no = targ->thread_no;
+ int *jshare = targ->jshare;
+#endif
+ int j;
+ char **revseq;
+
+ revseq = AllocateCharMtx( 1, nlenmax+1 );
+
+ j = -1;
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( nthread )
+ {
+ pthread_mutex_lock( targ->mutex_counter );
+ j = *jshare;
+ if( j%100 == 0 ) reporterr( "%d / %d (thread %d) \r", j, iend, thread_no );
+ if( j == iend )
+ {
+ pthread_mutex_unlock( targ->mutex_counter );
+ break;
+ }
+ ++(*jshare);
+ pthread_mutex_unlock( targ->mutex_counter );
+ }
+ else
+#endif
+ {
+ j++;
+ if( j%100 == 0 ) reporterr( "%d / %d \r", j, iend );
+ if( j == iend )
+ {
+ break;
+ }
+ }
+
+ sreverse( revseq[0], seq[j] );
+#if GLOBAL
+ res[j] = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, seq+j, seq+j, 0 );
+ res[j] -= G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, seq+j, revseq, 0 );
+#else
+ res[j] = L__align11_noalign( n_dis_consweight_multi, seq+j, seq+j );
+ res[j] -= L__align11_noalign( n_dis_consweight_multi, seq+j, revseq );
+#endif
+ }
+
+ creverse( 0 );
+ FreeCharMtx( revseq );
+#if GLOBAL
+ G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 );
+#else
+ L__align11_noalign( NULL, NULL, NULL );
+#endif
+ return( NULL );
+}
+
+#if 0
+static void partshuffle( int size, int outsize, int *ary )
+{
+ int i;
+
+// reporterr( "ary before shuffle = \n" );
+ // for(i=0;i<size;i++) reporterr( "%d ", ary[i] );
+// reporterr( "\n" );
+
+ for(i=0;i<outsize;i++)
+ {
+ int j = rand()%size;
+ int t = ary[i];
+ ary[i] = ary[j];
+ ary[j] = t;
+ }
+
+// reporterr( "ary after shuffle = \n" );
+ // for(i=0;i<outsize;i++) reporterr( "%d ", ary[i] );
+// reporterr( "|" );
+ // for(i=outsize;i<size;i++) reporterr( "%d ", ary[i] );
+// reporterr( "\n" );
+}
+#endif
+
+void arguments( int argc, char *argv[] )
+{
+ int c;
+
+ nthread = 1;
+ inputfile = NULL;
+ nadd = 0;
+ dodp = 0;
+ alg = 'a';
+ alg = 'm';
+ dorp = NOTSPECIFIED;
+ fmodel = 0;
+// ppenalty = (int)( -2.0 * 1000 - 0.5 );
+// ppenalty_ex = (int)( -0.1 * 1000 - 0.5 );
+// poffset = (int)( 0.1 * 1000 - 0.5 );
+ ppenalty = NOTSPECIFIED;
+ ppenalty_ex = NOTSPECIFIED;
+ poffset = NOTSPECIFIED;
+ kimuraR = 2;
+ pamN = 200;
+ thresholdtorev = 0.0;
+ addfragment = 0;
+
+
+ while( --argc > 0 && (*++argv)[0] == '-' )
+ {
+ while ( (c = *++argv[0]) )
+ {
+ switch( c )
+ {
+ case 'i':
+ inputfile = *++argv;
+ fprintf( stderr, "inputfile = %s\n", inputfile );
+ --argc;
+ goto nextoption;
+ case 'I':
+ nadd = myatoi( *++argv );
+ fprintf( stderr, "nadd = %d\n", nadd );
+ --argc;
+ goto nextoption;
+ case 'C':
+ nthread = myatoi( *++argv );
+ fprintf( stderr, "nthread = %d\n", nthread );
+ --argc;
+ goto nextoption;
+ case 'f':
+ ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
+// fprintf( stderr, "ppenalty = %d\n", ppenalty );
+ --argc;
+ goto nextoption;
+ case 'g':
+ ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
+ fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
+ --argc;
+ goto nextoption;
+ case 'h':
+ poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
+// fprintf( stderr, "poffset = %d\n", poffset );
+ --argc;
+ goto nextoption;
+ case 'k':
+ kimuraR = myatoi( *++argv );
+ fprintf( stderr, "kappa = %d\n", kimuraR );
+ --argc;
+ goto nextoption;
+ case 'j':
+ pamN = myatoi( *++argv );
+ scoremtx = 0;
+ TMorJTT = JTT;
+ fprintf( stderr, "jtt/kimura %d\n", pamN );
+ --argc;
+ goto nextoption;
+ case 't':
+ thresholdtorev = atof( *++argv );
+ fprintf( stderr, "thresholdtorev = %f\n", thresholdtorev );
+ --argc;
+ goto nextoption;
+ case 'o':
+ mode = *(*++argv);
+ fprintf( stderr, "mode = %c\n", mode );
+ --argc;
+ goto nextoption;
+ case 'r':
+ reflim = myatoi(*++argv);
+ fprintf( stderr, "reflim = %d\n", reflim );
+ --argc;
+ goto nextoption;
+ case 'c':
+ contrastsort = 0;
+ break;
+ case 'd':
+ dodp = 1;
+ break;
+ case 'F':
+ addfragment = 1;
+ break;
+#if 1
+ case 'a':
+ fmodel = 1;
+ break;
+#endif
+ case 'S':
+ alg = 'S';
+ break;
+ case 'M':
+ alg = 'M';
+ break;
+ case 'm':
+ alg = 'm';
+ break;
+ case 'G':
+ alg = 'G';
+ break;
+ case 'D':
+ dorp = 'd';
+ break;
+ case 'P':
+ dorp = 'p';
+ break;
+ default:
+ fprintf( stderr, "illegal option %c\n", c );
+ argc = 0;
+ break;
+ }
+ }
+ nextoption:
+ ;
+ }
+ if( argc == 1 )
+ {
+ cut = atof( (*argv) );
+ argc--;
+ }
+ if( argc != 0 )
+ {
+ fprintf( stderr, "options: Check source file !\n" );
+ exit( 1 );
+ }
+ if( tbitr == 1 && outgap == 0 )
+ {
+ fprintf( stderr, "conflicting options : o, m or u\n" );
+ exit( 1 );
+ }
+}
+
+
+
+
+
+void seq_grp_nuc( int *grp, char *seq )
+{
+ int tmp;
+ int *grpbk = grp;
+ while( *seq )
+ {
+ tmp = amino_grp[(int)*seq++];
+ if( tmp < 4 )
+ *grp++ = tmp;
+ else
+// fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
+ ;
+ }
+ *grp = END_OF_VEC;
+ if( grp - grpbk < 6 )
+ {
+// fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
+// exit( 1 );
+ *grpbk = -1;
+ }
+}
+
+void seq_grp( int *grp, char *seq )
+{
+ int tmp;
+ int *grpbk = grp;
+ while( *seq )
+ {
+ tmp = amino_grp[(int)*seq++];
+ if( tmp < 6 )
+ *grp++ = tmp;
+ else
+// fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
+ ;
+ }
+ *grp = END_OF_VEC;
+ if( grp - grpbk < 6 )
+ {
+// fprintf( stderr, "\n\nWARNING: Too short.\nPlease also consider use mafft-ginsi, mafft-linsi or mafft-ginsi.\n\n\n" );
+// exit( 1 );
+ *grpbk = -1;
+ }
+}
+
+void makecompositiontable_p( short *table, int *pointt )
+{
+ int point;
+
+ while( ( point = *pointt++ ) != END_OF_VEC )
+ table[point]++;
+}
+
+
+void makepointtable_nuc( int *pointt, int *n )
+{
+ int point;
+ register int *p;
+
+ if( *n == -1 )
+ {
+ *pointt = -1;
+ return;
+ }
+
+ p = n;
+ point = *n++ * 1024;
+ point += *n++ * 256;
+ point += *n++ * 64;
+ point += *n++ * 16;
+ point += *n++ * 4;
+ point += *n++;
+ *pointt++ = point;
+
+ while( *n != END_OF_VEC )
+ {
+ point -= *p++ * 1024;
+ point *= 4;
+ point += *n++;
+ *pointt++ = point;
+ }
+ *pointt = END_OF_VEC;
+}
+
+void makepointtable( int *pointt, int *n )
+{
+ int point;
+ register int *p;
+
+ if( *n == -1 )
+ {
+ *pointt = -1;
+ return;
+ }
+
+ p = n;
+ point = *n++ * 7776;
+ point += *n++ * 1296;
+ point += *n++ * 216;
+ point += *n++ * 36;
+ point += *n++ * 6;
+ point += *n++;
+ *pointt++ = point;
+
+ while( *n != END_OF_VEC )
+ {
+ point -= *p++ * 7776;
+ point *= 6;
+ point += *n++;
+ *pointt++ = point;
+ }
+ *pointt = END_OF_VEC;
+}
+
+static int localcommonsextet_p2( short *table, int *pointt )
+{
+ int value = 0;
+ short tmp;
+ int point;
+ short *memo;
+ int *ct;
+ int *cp;
+
+ if( *pointt == -1 )
+ return( 0 );
+
+ memo = (short *)calloc( tsize, sizeof( short ) );
+ if( !memo ) ErrorExit( "Cannot allocate memo\n" );
+ ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); // chuui!!
+ if( !ct ) ErrorExit( "Cannot allocate memo\n" );
+
+ cp = ct;
+ while( ( point = *pointt++ ) != END_OF_VEC )
+ {
+ tmp = memo[point]++;
+ if( tmp < table[point] )
+ value++;
+ if( tmp == 0 ) *cp++ = point;
+ }
+ *cp = END_OF_VEC;
+
+ cp = ct;
+ while( *cp != END_OF_VEC )
+ memo[*cp++] = 0;
+
+ free( memo );
+ free( ct );
+ return( value );
+}
+
+static int compfunc( const void *a, const void *b )
+{
+ return ((contrastarr *)b)->dif - ((contrastarr *)a)->dif; // correct
+// return ((contrastarr *)a)->dif - ((contrastarr *)b)->dif; // incorrect!
+}
+
+static void makecontrastorder6mer( int *order, int **pointt, int **pointt_rev, char **seq, int iend, int shift )
+{
+ int i;
+ double *res;
+ contrastarr *arr;
+ short *table1, *table1_rev;
+
+
+ arr = calloc( iend, sizeof( contrastarr ) );
+ res = calloc( iend, sizeof( double ) );
+
+ for( i=0; i<iend; i++ )
+ {
+ if( i % 100 == 1 ) reporterr( "%d \r", i );
+ table1 = (short *)calloc( tsize, sizeof( short ) );
+ if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+ makecompositiontable_p( table1, pointt[i] );
+ res[i] = localcommonsextet_p2( table1, pointt[i] );
+ free( table1 );
+
+ table1_rev = (short *)calloc( tsize, sizeof( short ) );
+ if( !table1_rev ) ErrorExit( "Cannot allocate table1\n" );
+ makecompositiontable_p( table1_rev, pointt_rev[i] );
+ res[i] -= localcommonsextet_p2( table1_rev, pointt[i] );
+ free( table1_rev );
+
+ }
+
+ for( i=0; i<iend; i++ )
+ {
+ arr[i].pos = i;
+ arr[i].dif = res[i];
+ }
+
+ qsort( arr, iend, sizeof( contrastarr ), compfunc );
+
+ for( i=0; i<iend; i++ )
+ order[i] = arr[i].pos + shift;
+
+// for( i=0; i<iend; i++ ) reporterr( "%f\n", arr[i].dif );
+// reporterr( "highest contrast, %s\n", seq[order[0]] );
+// reporterr( "lowest contrast, %s\n", seq[order[iend-1]] );
+
+ free( arr );
+ free( res );
+
+}
+static void makecontrastorder( int *order, char **seq, int iend, int shift )
+{
+ int i;
+ double *res;
+ contrastarr *arr;
+
+ arr = calloc( iend, sizeof( contrastarr ) );
+ res = calloc( iend, sizeof( double ) );
+
+#ifdef enablemultithread
+ if( nthread )
+ {
+ int j;
+ pthread_t *handle;
+ pthread_mutex_t mutex_counter;
+ selfdpthread_arg_t *targ;
+ int *jsharept;
+
+ targ = calloc( nthread, sizeof( selfdpthread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex_counter, NULL );
+ jsharept = calloc( 1, sizeof(int) );
+ *jsharept = 0;
+
+ for( j=0; j<nthread; j++ )
+ {
+ targ[j].iend = iend;
+ targ[j].seq = seq;
+ targ[j].res = res;
+ targ[j].jshare = jsharept;
+ targ[j].mutex_counter = &mutex_counter;
+ targ[j].thread_no = j;
+ pthread_create( handle+j, NULL, selfdpthread, (void *)(targ+j) );
+ }
+ for( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );
+ pthread_mutex_destroy( &mutex_counter );
+ free( handle );
+ free( targ );
+ free( jsharept );
+ }
+ else
+#endif
+ {
+ selfdpthread_arg_t *targ;
+ targ = calloc( 1, sizeof( selfdpthread_arg_t ) );
+ targ[0].iend = iend;
+ targ[0].seq = seq;
+ targ[0].res = res;
+ selfdpthread( targ );
+ free( targ );
+ }
+
+ for( i=0; i<iend; i++ )
+ {
+ arr[i].pos = i;
+ arr[i].dif = res[i];
+ }
+
+ qsort( arr, iend, sizeof( contrastarr ), compfunc );
+
+ for( i=0; i<iend; i++ )
+ order[i] = arr[i].pos + shift;
+
+// for( i=0; i<iend; i++ ) reporterr( "%f\n", arr[i].dif );
+// reporterr( "highest contrast, %s\n", seq[order[0]] );
+// reporterr( "lowest contrast, %s\n", seq[order[iend-1]] );
+
+ free( arr );
+ free( res );
+
+}
+
+
+static void *directionthread( void *arg )
+{
+ thread_arg_t *targ = (thread_arg_t *)arg;
+ int iend = targ->iend;
+ char **seq = targ->seq;
+ int *map = targ->map;
+ char *tmpseq = targ->tmpseq;
+ int *res = targ->res;
+ int **spointt = targ->spointt;
+ short *table1 = targ->table1;
+// int iq = targ->iq;
+#ifdef enablemultithread
+// int thread_no = targ->thread_no;
+ int *jshare = targ->jshare;
+#endif
+ int j;
+ char **mseq1, **mseq2;
+
+
+ if( dodp ) // nakuserukamo
+ {
+ mseq1 = AllocateCharMtx( 1, 0 );
+ mseq2 = AllocateCharMtx( 1, 0 );
+ }
+
+ j = -1;
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( nthread )
+ {
+ pthread_mutex_lock( targ->mutex_counter );
+ j = *jshare;
+ if( j == iend )
+ {
+ pthread_mutex_unlock( targ->mutex_counter );
+ break;
+ }
+ ++(*jshare);
+ pthread_mutex_unlock( targ->mutex_counter );
+ }
+ else
+#endif
+ {
+ j++;
+ if( j == iend )
+ {
+// if( iq%100==1 ) fprintf( stderr, "\r %d / %d \r", iq, njob );
+ break;
+ }
+ }
+
+
+ if( dodp )
+ {
+// strcpy( mseq1[0], tmpseq );
+// strcpy( mseq2[0], seq[j] );
+ mseq1[0] = tmpseq;
+ mseq2[0] = seq[map[j]];
+#if GLOBAL
+ res[j] = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, 0 );
+#else
+ res[j] = L__align11_noalign( n_dis_consweight_multi, mseq1, mseq2 );
+#endif
+ }
+ else
+ {
+// reporterr( "\n\nj=%d, map[j]=%d\n\n", j, map[j] );
+ res[j] = localcommonsextet_p2( table1, spointt[map[j]] );
+ }
+ }
+ if( dodp ) // nakuserukamo
+ {
+ free( mseq1 );
+ free( mseq2 );
+#if GLOBAL
+ G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 );
+#else
+ L__align11_noalign( NULL, NULL, NULL );
+#endif
+ }
+// else
+// if( nthread ) // inthread == 0 no toki free suru to, error. nazeda
+// localcommonsextet_p( NULL, NULL );
+ return( NULL );
+}
+
+int main( int argc, char *argv[] )
+{
+ static int *nlen;
+ static int *nogaplen;
+ static char **name, **seq;
+ int i, j, istart, iend, ic;
+ FILE *infp;
+// FILE *adfp;
+ char c;
+
+ int *grpseq;
+ char *tmpseq, *revseq;
+ int **pointt, **pointt_rev, **spointt;
+ double res_forward, res_reverse, res_max;
+ int ires, mres, mres2;
+ int *res, *resr, *resf;
+ int *map;
+ static short *table1, *table1_rev;
+ static char **mseq1f, **mseq1r, **mseq2;
+ int *contrastorder;
+
+ arguments( argc, argv );
+#ifndef enablemultithread
+ nthread = 0;
+#endif
+
+ if( inputfile )
+ {
+ infp = fopen( inputfile, "r" );
+ if( !infp )
+ {
+ fprintf( stderr, "Cannot open %s\n", inputfile );
+ exit( 1 );
+ }
+ }
+ else
+ infp = stdin;
+
+ getnumlen( infp );
+ rewind( infp );
+
+ if( alg == 'a' )
+ {
+ if( nlenmax < 10000 )
+ alg = 'G';
+ else
+ alg = 'S';
+ }
+
+ seq = AllocateCharMtx( njob, nlenmax*1+1 );
+
+#if 0
+ Read( name, nlen, seq );
+ readData( infp, name, nlen, seq );
+#else
+ name = AllocateCharMtx( njob, B+1 );
+ nlen = AllocateIntVec( njob );
+ nogaplen = AllocateIntVec( njob );
+ readData_pointer( infp, name, nlen, seq );
+ fclose( infp );
+
+ if( dorp != 'd' )
+ {
+ fprintf( stderr, "Not necessary!\n" );
+ for( i=0; i<njob; i++ )
+ fprintf( stdout, "_F_%-10.10s\n", name[i]+1 );
+ exit( 1 );
+ }
+#endif
+
+ constants( njob, seq );
+
+
+#if 0
+ fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
+#endif
+
+ initSignalSM();
+
+ initFiles();
+
+ c = seqcheck( seq );
+ if( c )
+ {
+ fprintf( stderr, "Illegal character %c\n", c );
+ exit( 1 );
+ }
+
+ fprintf( stderr, "\n" );
+ if( alg == 'G' ) // dp to the first sequence
+ {
+ mseq1f = AllocateCharMtx( 1, nlenmax+nlenmax );
+ mseq1r = AllocateCharMtx( 1, nlenmax+nlenmax );
+ mseq2 = AllocateCharMtx( 1, nlenmax+nlenmax );
+ tmpseq = AllocateCharVec( MAX( nlenmax, B ) +1 );
+
+ gappick0( mseq1f[0], seq[0] );
+ sreverse( mseq1r[0], mseq1f[0] );
+ strcpy( seq[0], mseq1f[0] );
+
+ if( nadd )
+ istart = njob - nadd;
+ else
+ istart = 1;
+
+ fprintf( stderr, "\n" );
+
+ for( i=0; i<istart; i++ )
+ {
+ gappick0( tmpseq, seq[i] );
+ strcpy( seq[i], tmpseq );
+ strcpy( tmpseq, name[i] );
+ strcpy( name[i], "_F_" );
+ strncpy( name[i]+3, tmpseq+1, 10 );
+ name[i][13] = 0;
+ }
+ for( i=istart; i<njob; i++ )
+ {
+ gappick0( mseq2[0], seq[i] );
+
+#if GLOBAL
+ res_forward = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1f, mseq2, 0 );
+ res_reverse = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1r, mseq2, 0 );
+#else
+ res_forward = L__align11_noalign( n_dis_consweight_multi, mseq1f, mseq2 );
+ res_reverse = L__align11_noalign( n_dis_consweight_multi, mseq1r, mseq2 );
+#endif
+#if 0
+
+ strcpy( mseq2[0], seq[i] );
+ strcpy( mseq1f[0], seq[0] );
+ res_forward = G__align11( n_dis_consweight_multi, mseq1f, mseq2, nlenmax*2, 0, 0 );
+ fprintf( stdout, "%s\n", mseq1f[0] );
+ fprintf( stdout, "%s\n", mseq2[0] );
+
+ strcpy( mseq2[0], seq[i] );
+ sreverse( mseq1r[0], seq[0] );
+ res_reverse = G__align11( n_dis_consweight_multi, mseq1r, mseq2, nlenmax*2, 0, 0 );
+ fprintf( stdout, "%s\n", mseq1r[0] );
+ fprintf( stdout, "%s\n", mseq2[0] );
+#endif
+
+// fprintf( stdout, "\nscore_for(%d,%d) = %f\n", 0, i, res_forward );
+// fprintf( stdout, "score_rev(%d,%d) = %f\n", 0, i, res_reverse );
+ res_max = MAX(res_reverse,res_forward);
+ if( (res_reverse-res_forward)/res_max > thresholdtorev ) // tekitou
+ {
+// fprintf( stderr, "REVERSE!!!\n" );
+ sreverse( seq[i], mseq2[0] );
+
+ strcpy( tmpseq, name[i] );
+ strcpy( name[i], "_R_" );
+ strncpy( name[i]+3, tmpseq+1, 10 );
+ name[i][13] = 0;
+ }
+ else
+ {
+ strcpy( seq[i], mseq2[0] );
+
+ strcpy( tmpseq, name[i] );
+ strcpy( name[i], "_F_" );
+ strncpy( name[i]+3, tmpseq+1, 10 );
+ name[i][13] = 0;
+ }
+ }
+ FreeCharMtx( mseq1f );
+ FreeCharMtx( mseq1r );
+ FreeCharMtx( mseq2 );
+ free( tmpseq );
+ }
+ else if( alg == 'm' )
+ {
+
+ if( dodp ) // nakuserukamo
+ {
+ mseq1f = AllocateCharMtx( 1, nlenmax+1);
+ mseq1r = AllocateCharMtx( 1, nlenmax+1 );
+ mseq2 = AllocateCharMtx( 1, nlenmax+1 );
+ }
+ else
+ {
+// nthread = 0; // heiretsu keisan no kouritsu ha warui node
+ spointt = AllocateIntMtx( njob, 0 );
+ pointt = AllocateIntMtx( njob, nlenmax+1 );
+ pointt_rev = AllocateIntMtx( njob, nlenmax+1 );
+ }
+ tmpseq = AllocateCharVec( MAX( nlenmax, B ) +1 );
+ revseq = AllocateCharVec( nlenmax+1 );
+ grpseq = AllocateIntVec( nlenmax+1 );
+ res = AllocateIntVec( njob );
+ resr = AllocateIntVec( njob );
+ resf = AllocateIntVec( njob );
+ map = AllocateIntVec( njob );
+ contrastorder = AllocateIntVec( njob );
+ if( dorp == 'd' ) tsize = (int)pow( 4, 6 );
+ else tsize = (int)pow( 6, 6 ); // iranai
+
+ maxl = 0;
+ for( i=0; i<njob; i++ )
+ {
+ gappick0( tmpseq, seq[i] );
+ nogaplen[i] = strlen( tmpseq );
+ if( nogaplen[i] > maxl ) maxl = nogaplen[i];
+ }
+
+ reporterr( "Step 1/2\n" );
+
+ if( !dodp )
+ {
+ if( nadd )
+ iend = njob - nadd;
+ else
+ iend = 0; // keisan shinai
+
+ for( i=0; i<iend; i++ )
+ {
+ gappick0( tmpseq, seq[i] );
+ strcpy( seq[i], tmpseq );
+ seq_grp_nuc( grpseq, tmpseq );
+ makepointtable_nuc( pointt[i], grpseq );
+ spointt[i] = pointt[i];
+ }
+
+ if( nadd )
+ istart = njob - nadd;
+ else
+ istart = 0;
+ for( i=istart; i<njob; i++ )
+ {
+
+ gappick0( tmpseq, seq[i] );
+ strcpy( seq[i], tmpseq );
+ sreverse( revseq, tmpseq );
+
+ seq_grp_nuc( grpseq, tmpseq );
+ makepointtable_nuc( pointt[i], grpseq );
+// makecompositiontable_p( table1, pointt[i] ); -> moto no basho ni modosu
+ seq_grp_nuc( grpseq, revseq );
+ makepointtable_nuc( pointt_rev[i], grpseq );
+// makecompositiontable_p( table1_rev, pointt_rev[i] ); -> moto no basho ni modosu
+ spointt[i] = pointt[i];
+
+
+// reporterr( "pointt[i] = %p\n", pointt[i] );
+// reporterr( "pointt[i][0] = %p\n", pointt[i][0] );
+
+ }
+ }
+
+
+ if( contrastsort ) // sukoshi chuui
+ {
+
+
+ if( nadd )
+ {
+ iend = njob-nadd;
+ for( i=0; i<iend; i++ ) contrastorder[i] = i;
+ istart = njob-nadd;
+ iend = nadd;
+ }
+ else
+ {
+ istart = 0;
+ iend = njob;
+ }
+
+ if( dodp )
+ makecontrastorder( contrastorder+istart, seq+istart, iend, istart );
+ else
+ makecontrastorder6mer( contrastorder+istart, pointt+istart, pointt_rev+istart, seq+istart, iend, istart );
+ }
+ else
+ {
+ for( i=0; i<njob; i++ ) contrastorder[i] = i;
+ }
+
+
+// reporterr( "contrastorder = \n" );
+// for( i=0; i<njob; i++ )
+// reporterr( "%d ", contrastorder[i] );
+// reporterr( "\n" );
+
+
+
+ if( nadd )
+ iend = njob - nadd;
+ else
+ iend = 1;
+ for( i=0; i<iend; i++ )
+ {
+ ic = contrastorder[i];
+// fprintf( stdout, "%d, SKIP\n", i );
+ gappick0( tmpseq, seq[ic] );
+ strcpy( seq[ic], tmpseq );
+// if( !nadd ) strcpy( seq[i], tmpseq ); // seq ha tsukawanaikara ii.
+
+#if 0 // -> makecontrastorder() no mae ni idou
+ if( !dodp )
+ {
+ seq_grp_nuc( grpseq, tmpseq );
+ makepointtable_nuc( pointt[ic], grpseq );
+ spointt[ic] = pointt[ic];
+ }
+#endif
+
+ strcpy( tmpseq, name[ic] );
+ strcpy( name[ic], "_F_" );
+ strncpy( name[ic]+3, tmpseq+1, 10 );
+ name[ic][13] = 0;
+ }
+
+ reporterr( "\n\nStep 2/2\n" );
+
+ if( nadd )
+ istart = njob - nadd;
+ else
+ istart = 1;
+ for( i=istart; i<njob; i++ )
+ {
+// fprintf( stderr, "\r %d / %d ", i, njob );
+ ic = contrastorder[i];
+ gappick0( tmpseq, seq[ic] );
+ strcpy( seq[ic], tmpseq );
+ sreverse( revseq, tmpseq );
+
+#if 0 // -> makecontrastorder() no mae ni idou
+ if( !dodp )
+ {
+ table1 = (short *)calloc( tsize, sizeof( short ) );
+ if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+ table1_rev = (short *)calloc( tsize, sizeof( short ) );
+ if( !table1_rev ) ErrorExit( "Cannot allocate table1_rev\n" );
+ seq_grp_nuc( grpseq, tmpseq );
+ makepointtable_nuc( pointt[ic], grpseq );
+ makecompositiontable_p( table1, pointt[ic] );
+ seq_grp_nuc( grpseq, revseq );
+ makepointtable_nuc( pointt_rev[ic], grpseq );
+ makecompositiontable_p( table1_rev, pointt_rev[ic] );
+ }
+#else
+ if( !dodp )
+ {
+ table1 = (short *)calloc( tsize, sizeof( short ) );
+ if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
+ table1_rev = (short *)calloc( tsize, sizeof( short ) );
+ if( !table1_rev ) ErrorExit( "Cannot allocate table1_rev\n" );
+ makecompositiontable_p( table1, pointt[ic] );
+ makecompositiontable_p( table1_rev, pointt_rev[ic] );
+ }
+#endif
+
+ if( nadd && addfragment )
+ iend = njob-nadd;
+ else
+ iend = i;
+
+
+ if( iend > reflim )
+ {
+// reporterr( "iend = %d -> %d\n", iend, reflim );
+#if 0
+ for( j=0; j<iend; j++ ) map[j] = j;
+ partshuffle( iend, reflim, map );
+#else
+ for( j=0; j<iend; j++ ) map[j] = contrastorder[j];
+#endif
+ iend = reflim; // approximation
+ }
+ else
+ {
+#if 0
+ for( j=0; j<iend; j++ ) map[j] = j;
+#else
+ for( j=0; j<iend; j++ ) map[j] = contrastorder[j];
+#endif
+ }
+
+// reporterr( "reflim = %d, seq[%d] = %s\n", reflim, contrastorder[0], seq[contrastorder[0]] );
+
+#ifdef enablemultithread
+ if( nthread )
+ {
+ pthread_t *handle;
+ pthread_mutex_t mutex_counter;
+ thread_arg_t *targ;
+ int *jsharept;
+
+ targ = calloc( nthread, sizeof( thread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex_counter, NULL );
+ jsharept = calloc( 1, sizeof(int) );
+ *jsharept = 0;
+
+ if( i%100==1 ) fprintf( stderr, " %d / %d (%d threads) \r", i, njob, nthread );
+ for( j=0; j<nthread; j++ )
+ {
+ targ[j].iend = iend;
+ targ[j].map = map;
+ targ[j].seq = seq;
+ targ[j].tmpseq = tmpseq;
+ targ[j].res = resf;
+ targ[j].spointt = spointt;
+ targ[j].table1 = table1;
+ targ[j].jshare = jsharept;
+ targ[j].iq = i; // iranai
+ targ[j].mutex_counter = &mutex_counter;
+ targ[j].thread_no = j;
+ pthread_create( handle+j, NULL, directionthread, (void *)(targ+j) );
+ }
+ for( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );
+ pthread_mutex_destroy( &mutex_counter );
+ free( handle );
+ free( targ );
+ free( jsharept );
+ }
+ else
+#endif
+ {
+ thread_arg_t *targ;
+
+ if( i%100==1 ) fprintf( stderr, " %d / %d \r", i, njob );
+ targ = calloc( 1, sizeof( thread_arg_t ) );
+ targ[0].iend = iend;
+ targ[0].map = map;
+ targ[0].seq = seq;
+ targ[0].tmpseq = tmpseq;
+ targ[0].res = resf;
+ targ[0].spointt = spointt;
+ targ[0].table1 = table1;
+ targ[0].iq = i; // iranai
+ directionthread( targ );
+ free( targ );
+ }
+
+
+
+#ifdef enablemultithread
+ if( nthread )
+ {
+ pthread_t *handle;
+ pthread_mutex_t mutex_counter;
+ thread_arg_t *targ;
+ int *jsharept;
+
+ targ = calloc( nthread, sizeof( thread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex_counter, NULL );
+ jsharept = calloc( 1, sizeof(int) );
+ *jsharept = 0;
+
+ for( j=0; j<nthread; j++ )
+ {
+ targ[j].iend = iend;
+ targ[j].seq = seq;
+ targ[j].map = map;
+ targ[j].tmpseq = revseq;
+ targ[j].res = resr;
+ targ[j].spointt = spointt;
+ targ[j].table1 = table1_rev;
+ targ[j].jshare = jsharept;
+ targ[j].iq = i; // iranai
+ targ[j].mutex_counter = &mutex_counter;
+ targ[j].thread_no = j;
+ pthread_create( handle+j, NULL, directionthread, (void *)(targ+j) );
+ }
+ for( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );
+ pthread_mutex_destroy( &mutex_counter );
+ free( handle );
+ free( targ );
+ free( jsharept );
+ }
+ else
+#endif
+ {
+ thread_arg_t *targ;
+ targ = calloc( 1, sizeof( thread_arg_t ) );
+ targ[0].iend = iend;
+ targ[0].seq = seq;
+ targ[0].map = map;
+ targ[0].tmpseq = revseq;
+ targ[0].res = resr;
+ targ[0].spointt = spointt;
+ targ[0].table1 = table1_rev;
+ targ[0].iq = i; // iranai
+ directionthread( targ );
+ free( targ );
+ }
+
+ if( mode == '2' )
+ {
+ mres = mres2 = 0;
+ for( j=0; j<iend; j++ )
+ {
+ ires = resf[j];
+// fprintf( stdout, "ires (%d,%d) = %d\n", i, j, ires );
+// fflush( stdout );
+ if( ires>mres2 )
+ {
+ if( ires>mres )
+ {
+ mres2 = mres;
+ mres = ires;
+ }
+ else
+ mres2 = ires;
+ }
+ }
+ res_forward = (double)( mres + mres2 ) / 2;
+ mres = mres2 = 0;
+ for( j=0; j<iend; j++ )
+ {
+ ires = resr[j];
+ if( ires>mres2 )
+ {
+ if( ires>mres )
+ {
+ mres2 = mres;
+ mres = ires;
+ }
+ else
+ mres2 = ires;
+ }
+ }
+ res_reverse = (double)( mres + mres2 ) / 2;
+ res_max = MAX(res_reverse,res_forward);
+ }
+// reporterr( "i=%d, res_reverse = %f\n", i, res_reverse );
+ else if( mode == '1' )
+ {
+ res_reverse = 0.0;
+ for( j=0; j<iend; j++ ) if( res_reverse < (double)resr[j] ) res_reverse = (double)resr[j];
+ res_forward = 0.0;
+ for( j=0; j<iend; j++ ) if( res_forward < (double)resf[j] ) res_forward = (double)resf[j];
+ res_max = 1.0;
+ }
+
+ else if( mode == 'd' )
+ {
+ res_reverse = 0.0;
+ for( j=0; j<iend; j++ ) if( res_reverse < (double)(resr[j]-resf[j]) ) res_reverse = (double)(resr[j]-resf[j]);
+ res_forward = 0.0;
+ for( j=0; j<iend; j++ ) if( res_forward < (double)(resf[j]-resr[j]) ) res_forward = (double)(resf[j]-resr[j]);
+ res_max = 1.0;
+ }
+
+ else if( mode == 'a' )
+ {
+ res_reverse = 0.0;
+ for( j=0; j<iend; j++ ) res_reverse += (double)resr[j];
+ res_reverse /= (double)iend;
+ res_forward = 0.0;
+ for( j=0; j<iend; j++ ) res_forward += (double)resf[j];
+ res_forward /= (double)iend;
+ res_max = 1.0;
+ }
+ else
+ {
+ reporterr( "Unknown mode!\n" );
+ exit( 1 );
+ }
+
+
+ if( (res_reverse>res_forward) ) // tekitou
+// if( (res_reverse-res_forward)/res_max > thresholdtorev ) // tekitou
+ {
+ strcpy( seq[ic], revseq );
+
+ strcpy( tmpseq, name[ic] );
+ strcpy( name[ic], "_R_" );
+ strncpy( name[ic]+3, tmpseq+1, 10 );
+ name[ic][13] = 0;
+ if( !dodp ) spointt[ic] = pointt_rev[ic];
+ }
+ else
+ {
+ strcpy( tmpseq, name[ic] );
+ strcpy( name[ic], "_F_" );
+ strncpy( name[ic]+3, tmpseq+1, 10 );
+ name[ic][13] = 0;
+ if( !dodp ) spointt[ic] = pointt[ic];
+ }
+
+ if( !dodp )
+ {
+ free( table1 );
+ free( table1_rev );
+ }
+ }
+
+ if( name[0][1] == 'R' )
+ {
+ for( j=0; j<njob; j++ )
+ {
+ if( name[j][1] == 'R' )
+ name[j][1] = 'F';
+ else
+ name[j][1] = 'R';
+ }
+ }
+
+ creverse( 0 );
+ free( tmpseq );
+ free( revseq );
+ free( grpseq );
+ free( res );
+ free( resr );
+ free( resf );
+ free( map );
+ free( nlen );
+ free( nogaplen );
+ free( contrastorder );
+ if( dodp )
+ {
+ FreeCharMtx( mseq1f );
+ FreeCharMtx( mseq1r );
+ FreeCharMtx( mseq2 );
+ }
+ else
+ {
+ FreeIntMtx( pointt );
+ FreeIntMtx( pointt_rev );
+ free( spointt );
+ }
+ }
+ else
+ {
+ fprintf( stderr, "Unknown alg %c\n", alg );
+ exit( 1 );
+ }
+// writeData_pointer( stdout, njob, name, nlen, seq );
+ for( i=0; i<njob; i++ )
+ {
+// fprintf( stdout, ">%s\n", name[i] );
+// fprintf( stdout, "%s\n", seq[i] );
+ fprintf( stdout, "%s\n", name[i] );
+ }
+
+ FreeCharMtx( seq );
+ FreeCharMtx( name );
+ freeconstants();
+ closeFiles();
+
+ fprintf( stderr, "\n" );
+ SHOWVERSION;
+ return( 0 );
+}
+
--- /dev/null
+#!/bin/env ruby
+require 'getopts'
+
+seedoffset = 0
+
+if getopts( "s:" ) == nil || ARGV.length == 0 || $OPT_h then
+ puts "Usage: #{$0} [-s number_of_seeds] input_files"
+ exit
+end
+
+if $OPT_s
+ seedoffset = $OPT_s.to_i
+end
+
+
+files = ARGV
+
+
+num = seedoffset + 1
+for file in files
+ output = ""
+ STDERR.puts file
+ fp = File.open( file, "r" )
+ while line = fp.gets
+ if line =~ /^>/ then
+ output += " " + num.to_s
+ num += 1
+ end
+ end
+ fp.close
+ puts output + " # " + file
+end
int *pairnum;
int i;
int left, right;
- float prob;
+ double prob;
pairnum = (int *)calloc( length, sizeof( int ) );
for( i=0; i<length; i++ ) pairnum[i] = 0;
fgets( gett, 999, fp );
if( feof( fp ) ) break;
if( gett[0] == '>' ) continue;
- sscanf( gett, "%d %d %f", &left, &right, &prob );
+ sscanf( gett, "%d %d %lf", &left, &right, &prob );
if( prob < 0.01 ) continue; // mxscarna to mafft ryoho ni eikyou
//fprintf( stderr, "gett = %s\n", gett );
if( i == njob )
{
pthread_mutex_unlock( targ->mutex );
- return( NULL );
+// return( NULL );
+ break;
}
*jobpospt = i+1;
pthread_mutex_unlock( targ->mutex );
+ commongappick_record( 1, nogap+i, gapmap[i] );
+ if( strlen( nogap[i] ) == 0 ) continue;
sprintf( dirname, "_%d", i );
sprintf( com, "rm -rf %s", dirname );
system( com );
fprintf( stderr, "%d / %d (by thread %4d)\n", i+1, njob, thread_no );
- commongappick_record( 1, nogap+i, gapmap[i] );
sprintf( com, "%s/_mccaskillinorg", dirname );
infp = fopen( com, "w" );
// fprintf( infp, ">in\n%s\n", nogap[i] );
sprintf( com, "tr -d '\\r' < %s/_mccaskillinorg > %s/_mccaskillin", dirname, dirname );
system( com ); // for cygwin, wakaran
- sprintf( com, "cd %s; %s/mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", dirname, whereismccaskillmea );
+ if( alg == 'G' )
+ sprintf( com, "cd %s; %s/dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", dirname, whereismccaskillmea );
+ else
+ sprintf( com, "cd %s; %s/mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", dirname, whereismccaskillmea );
res = system( com );
if( res )
}
free( dirname );
free( com );
+ return( NULL );
}
#endif
kimuraR = NOTSPECIFIED;
pamN = NOTSPECIFIED;
whereismccaskillmea = NULL;
+ alg = 's';
while( --argc > 0 && (*++argv)[0] == '-' )
{
--argc;
goto nextoption;
case 'C':
- nthread = atoi( *++argv );
+ nthread = myatoi( *++argv );
fprintf( stderr, "nthread = %d\n", nthread );
--argc;
goto nextoption;
+ case 's':
+ alg = 's'; // use scarna; default
+ break;
+ case 'G':
+ alg = 'G'; // use dafs, instead of scarna
+ break;
default:
fprintf( stderr, "illegal option %c\n", c );
argc = 0;
RNApair *pairprobpt;
RNApair *pt;
int *alnpairnum;
- float prob;
+ double prob;
int adpos;
arguments( argc, argv );
constants( njob, seq );
- fprintf( stderr, "Running mxscarna with the mccaskill_mea mode.\n" );
+ if( alg == 'G' )
+ fprintf( stderr, "Running DAFS (Sato et al. 2012; http://www.ncrna.org/).\n" );
+ else
+ fprintf( stderr, "Running mxscarna with the mccaskill_mea mode.\n" );
#ifdef enablemultithread
if( nthread > 0 )
{
}
pthread_mutex_destroy( &mutex );
+ free( handle );
+ free( targ );
+
for( i=0; i<njob; i++ )
{
{
fprintf( stderr, "%d / %d\n", i+1, njob );
commongappick_record( 1, nogap+i, gapmap[i] );
+ if( strlen( nogap[i] ) == 0 )
+ {
+ fprintf( stdout, ">%d\n", i );
+ continue;
+ }
+
infp = fopen( "_mccaskillinorg", "w" );
// fprintf( infp, ">in\n%s\n", nogap[i] );
fprintf( infp, ">in\n" );
fclose( infp );
system( "tr -d '\\r' < _mccaskillinorg > _mccaskillin" ); // for cygwin, wakaran
- sprintf( com, "env PATH=%s mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", whereismccaskillmea );
+ if( alg == 'G' )
+ sprintf( com, "env PATH=%s dafs --mafft-out _mccaskillout _mccaskillin > _dum1 2>_dum", whereismccaskillmea );
+ else
+ sprintf( com, "env PATH=%s mxscarnamod -m -writebpp _mccaskillin > _mccaskillout 2>_dum", whereismccaskillmea );
res = system( com );
if( res )
for( j=0; j<nlenmax; j++ ) free( alnpairprob[j] );
free( alnpairprob );
free( alnpairnum );
+ free( order );
+ free( nlen );
+ FreeCharMtx( seq );
+ FreeCharMtx( nogap );
+ FreeCharMtx( name );
+ FreeIntMtx( gapmap );
+ freeconstants();
fprintf( stderr, "%d thread(s)\n", nthread );
return( 0 );
for( i=0; i<nlenmax; i++ ) for( pairprobpt=alnpairprob[i]; pairprobpt->bestpos!=-1; pairprobpt++ )
{
- pairprobpt->bestscore /= (float)njob;
+ pairprobpt->bestscore /= (double)njob;
left = i;
right = pairprobpt->bestpos;
prob = pairprobpt->bestscore;
--- /dev/null
+#!/usr/bin/env bash
+export PATH=/home/mingw32/mingw32/bin:$PATH
+export C_INCLUDE_PATH=/home/mingw32/mingw32/include
+export LIBRARY_PATH=/home/mingw32/mingw32/lib
+make clean
+make ENABLE_MULTITHREAD="" dlls
+rm -rf dll32
+mkdir dll32
+mv *.dll dll32/
+
+export PATH=/home/mingw64/mingw64/bin:$PATH
+export C_INCLUDE_PATH=/home/mingw64/mingw64/include
+export LIBRARY_PATH=/home/mingw64/mingw64/lib
+make clean
+make ENABLE_MULTITHREAD="" dlls
+rm -rf dll64
+mkdir dll64
+mv *.dll dll64/
#else
#endif
+#include "mafft.h"
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <math.h>
#include <ctype.h>
#include "mtxutl.h"
-#include <float.h>
+//#include <double.h>
+#include <stdarg.h>
#ifdef enablemultithread
#include <pthread.h>
#endif
+#ifndef mingw
+#include <sys/resource.h> // for setstacksize, 2016/Jun
+#endif
-#define VERSION "6.857b"
-#define SHOWVERSION fprintf( stderr, "%s (%s) Version " VERSION " alg=%c, model=%s\n%d thread(s)\n", progName( argv[0] ), (dorp=='d')?"nuc":"aa", alg, modelname, nthread )
+#define VERSION "7.310"
+#define SHOWVERSION reporterr( "%s (%s) Version " VERSION " alg=%c, model=%s, amax=%3.1f\n%d thread(s)\n\n", progName( argv[0] ), (dorp=='d')?"nuc":((nblosum==-2)?"text":"aa"), alg, modelname, specificityconsideration, nthread )
#define FFT_THRESHOLD 80
#define FFT_WINSIZE_P 20
#define DISPSEQF 60
#define DISPSITEI 0
#define MAXITERATION 500
-#define M 50000 /* njob no saidaiti */
+#define M 500000 /* njob no saidaiti */
#define N 5000000 /* nlen no saidaiti */
#define MAXSEG 100000
#define B 256
#define C 60 /* 1 gyou no mojisuu */
#define D 6
+#define DFORMAT "%#6.3f"
#define rnd() ( ( 1.0 / ( RAND_MAX + 1.0 ) ) * rand() )
#define MAX(X,Y) ( ((X)>(Y))?(X):(Y) )
#define MIN(X,Y) ( ((X)<(Y))?(X):(Y) )
#define JTT 201
#define TM 202
-extern char modelname[100];
+extern char modelname[500];
extern int njob, nlenmax;
-extern int amino_n[0x80];
-extern char amino_grp[0x80];
-extern int amino_dis[0x80][0x80];
-extern int amino_disLN[0x80][0x80];
-extern double amino_dis_consweight_multi[0x80][0x80];
-extern int n_dis[26][26];
-extern int n_disFFT[26][26];
-extern float n_dis_consweight_multi[26][26];
-extern char amino[26];
-extern double polarity[20];
-extern double volume[20];
+extern int amino_n[0x100];
+extern char amino_grp[0x100];
+//extern int amino_dis[0x100][0x100];
+extern int **amino_dis;
+extern double **n_disLN;
+//extern double amino_dis_consweight_multi[0x100][0x100];
+extern double **amino_dis_consweight_multi;
+extern int **n_dis;
+extern int **n_disFFT;
+extern double **n_dis_consweight_multi;
+extern unsigned char amino[0x100];
+extern double polarity[0x100];
+extern double volume[0x100];
extern int ribosumdis[37][37];
extern int ppid;
extern int check;
extern double cut;
extern int cooling;
+extern int trywarp;
extern int penalty, ppenalty, penaltyLN;
+extern int penalty_dist, ppenalty_dist;
extern int RNApenalty, RNAppenalty;
extern int RNApenalty_ex, RNAppenalty_ex;
extern int penalty_ex, ppenalty_ex, penalty_exLN;
extern int penalty_EX, ppenalty_EX;
extern int penalty_OP, ppenalty_OP;
+extern int penalty_shift;
+extern double penalty_shift_factor;
extern int offset, poffset, offsetLN, offsetFFT;
extern int RNAthr, RNApthr;
extern int scoremtx;
extern int disopt;
extern int pamN;
extern int checkC;
-extern float geta2;
+extern double geta2;
extern int treemethod;
extern int kimuraR;
extern char *swopt;
extern char *inputfile;
extern char *addfile;
extern int addprofile;
-extern float consweight_multi;
-extern float consweight_rna;
+extern double consweight_multi;
+extern double consweight_rna;
extern char RNAscoremtx;
extern char *signalSM;
extern double score_m_1( char **, int, int, double ** );
extern double score_calc0( char **, int, double **, int );
extern char seqcheck( char ** );
-extern float substitution( char *, char * );
-extern float substitution_score( char *, char * );
-extern float substitution_nid( char *, char * );
-extern float substitution_hosei( char *, char * );
+extern double substitution( char *, char * );
+extern double substitution_score( char *, char * );
+extern double substitution_nid( char *, char * );
+extern double substitution_hosei( char *, char * );
extern double ipower( double, int );
-extern float translate_and_Calign();
-extern float A__align();
-extern float A__align11();
-extern float A__align_gapmap();
-extern float partA__align();
-extern float L__align11();
-extern float G__align11();
-extern float Falign();
-extern float Falign_localhom();
-extern float Conalign();
-extern float Aalign();
-extern float imp_match_out_sc( int, int );
-extern float part_imp_match_out_sc( int, int );
+extern double translate_and_Calign();
+extern double A__align();
+extern double A__align11();
+extern double A__align_gapmap();
+extern double partA__align();
+extern double L__align11( double **scoringmtx, double scoreoffset, char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt );
+extern double G__align11();
+extern double Falign();
+extern double Falign_localhom();
+extern double Conalign();
+extern double Aalign();
+extern double imp_match_out_sc( int, int );
+extern double part_imp_match_out_sc( int, int );
extern void ErrorExit();
extern void cpmx_calc();
extern void intergroup_score( char **, char **, double *, double *, int, int, int, double * );
typedef struct _LocalHom
{
- int nokori;
struct _LocalHom *next;
struct _LocalHom *last;
int start1;
int overlapaa;
int extended;
double importance;
- float fimportance;
- double wimportance;
+ double rimportance;
+// double fimportance;
+// double wimportance;
char korh;
+ int nokori;
} LocalHom;
typedef struct _NodeInCub
typedef struct _Gappattern
{
int len;
- float freq;
+ double freq;
} Gappat;
typedef struct _RNApair
{
int uppos;
- float upscore;
+ double upscore;
int downpos;
- float downscore;
+ double downscore;
int bestpos;
- float bestscore;
+ double bestscore;
} RNApair;
typedef struct _Treedep
int child0;
int child1;
int done;
+ double distfromtip;
} Treedep;
+typedef struct _Addtree
+{
+ int nearest;
+ double dist1;
+ char *neighbors;
+ double dist2;
+} Addtree;
#include "fft.h"
#include "dp.h"
#define BAATARI2 3
extern int scoreout;
+extern int spscoreout;
extern int outnumber;
-extern char *newgapstr;
+extern int legacygapcost;
+extern double minimumweight;
+extern int nwildcard;
+
+extern TLS char *newgapstr;
+
+extern int nalphabets;
+extern int nscoredalphabets;
+extern double specificityconsideration;
+extern int ndistclass, maxdistclass;
+
+extern int gmsg;
+
+extern double sueff_global;
+extern double lenfaca, lenfacb, lenfacc, lenfacd;
+int maxl, tsize;
#include "mltaln.h"
#define DEBUG 0
+#define CANONICALTREEFORMAT 1
+#define MEMSAVE 1
+
+
#if 0
int seqlen( char *seq )
if( amino_n[(int)(*seq)[i]] == -1 )
{
- fprintf( stderr, "========================================================================= \n" );
- fprintf( stderr, "========================================================================= \n" );
- fprintf( stderr, "=== \n" );
- fprintf( stderr, "=== Alphabet '%c' is unknown.\n", (*seq)[i] );
- fprintf( stderr, "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) );
- fprintf( stderr, "=== \n" );
- fprintf( stderr, "=== To make an alignment having unusual characters (U, @, #, etc), try\n" );
- fprintf( stderr, "=== %% mafft --anysymbol input > output\n" );
- fprintf( stderr, "=== \n" );
- fprintf( stderr, "========================================================================= \n" );
- fprintf( stderr, "========================================================================= \n" );
+ reporterr( "========================================================================= \n" );
+ reporterr( "========================================================================= \n" );
+ reporterr( "=== \n" );
+ reporterr( "=== Alphabet '%c' is unknown.\n", (*seq)[i] );
+ reporterr( "=== Please check site %d in sequence %d.\n", i+1, (int)(seq-seqbk+1) );
+ reporterr( "=== \n" );
+ reporterr( "=== To make an alignment having unusual characters (U, @, #, etc), try\n" );
+ reporterr( "=== %% mafft --anysymbol input > output\n" );
+ reporterr( "=== \n" );
+ reporterr( "========================================================================= \n" );
+ reporterr( "========================================================================= \n" );
return( (int)(*seq)[i] );
}
}
}
return( 0 );
}
+
+void intcat( int *s1, int *s2 )
+{
+ while( *s1 != -1 ) s1++;
+ while( *s2 != -1 )
+ {
+// reporterr( "copying %d\n", *s2 );
+ *s1++ = *s2++;
+ }
+ *s1 = -1;
+}
+
+void intcpy( int *s1, int *s2 )
+{
+ while( *s2 != -1 )
+ {
+// reporterr( "copying %d\n", *s2 );
+ *s1++ = *s2++;
+ }
+ *s1 = -1;
+}
+
+void intncpy( int *s1, int *s2, int n )
+{
+ while( n-- ) *s1++ = *s2++;
+}
+
+void fltncpy( double *s1, double *s2, int n )
+{
+ while( n-- ) *s1++ = *s2++;
+}
+
+static int countmem( int *s )
+{
+ int v = 0;
+ while( *s++ != -1 ) v++;
+ return( v );
+}
+
+static int lastmem( int *s )
+{
+ while( *s++ != -1 )
+ ;
+ return( *(s-2) );
+}
+
-void scmx_calc( int icyc, char **aseq, double *effarr, float **scmx )
+void scmx_calc( int icyc, char **aseq, double *effarr, double **scmx )
{
int i, j, lgth;
lgth = strlen( aseq[0] );
for( j=0; j<lgth; j++ )
{
- for( i=0; i<26; i++ )
+ for( i=0; i<nalphabets; i++ )
{
scmx[i][j] = 0;
}
for( i=0; i<icyc+1; i++ )
{
int id;
- id = amino_n[(int)aseq[i][0]];
- scmx[id][0] += (float)effarr[i];
+ id = amino_n[(unsigned char)aseq[i][0]];
+ scmx[id][0] += (double)effarr[i];
}
for( j=1; j<lgth-1; j++ )
{
for( i=0; i<icyc+1; i++ )
{
int id;
- id = amino_n[(int)aseq[i][j]];
- scmx[id][j] += (float)effarr[i];
+ id = amino_n[(unsigned char)aseq[i][j]];
+ scmx[id][j] += (double)effarr[i];
}
}
for( i=0; i<icyc+1; i++ )
{
int id;
- id = amino_n[(int)aseq[i][lgth-1]];
- scmx[id][lgth-1] += (float)effarr[i];
+ id = amino_n[(unsigned char)aseq[i][lgth-1]];
+ scmx[id][lgth-1] += (double)effarr[i];
}
}
void exitall( char arr[] )
{
- fprintf( stderr, "%s\n", arr );
+ reporterr( "%s\n", arr );
exit( 1 );
}
if( !disp ) return;
if( nseq > DISPSEQF ) imax = DISPSEQF;
else imax = nseq;
- fprintf( stderr, " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" );
+ reporterr( " ....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+....,....+\n" );
for( i=0; i<+imax; i++ )
{
strncpy( b, seq[i]+DISPSITEI, 120 );
b[120] = 0;
- fprintf( stderr, "%3d %s\n", i+1, b );
- }
-}
-#if 0
-double intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len )
-{
- int i, j, k;
- double score;
- double tmpscore;
- char *mseq1, *mseq2;
- double efficient;
- char xxx[100];
-
-// totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
-// totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
-
- score = 0.0;
- for( i=0; i<clus1; i++ ) for( j=0; j<clus2; j++ )
- {
- efficient = eff1[i] * eff2[j];
- mseq1 = seq1[i];
- mseq2 = seq2[j];
- tmpscore = 0.0;
- for( k=0; k<len; k++ )
- {
- if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
-
- if( mseq1[k] == '-' )
- {
- tmpscore += penalty;
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
- while( mseq1[++k] == '-' )
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
- k--;
- if( k >len-2 ) break;
- continue;
- }
- if( mseq2[k] == '-' )
- {
- tmpscore += penalty;
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
- while( mseq2[++k] == '-' )
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
- k--;
- if( k > len-2 ) break;
- continue;
- }
+ reporterr( "%3d %s\n", i+1, b );
}
- score += (double)tmpscore * efficient;
-#if 1
- sprintf( xxx, "%f", score );
-// fprintf( stderr, "## score in intergroup_score = %f\n", score );
-#endif
- }
-#if 0
- fprintf( stderr, "###score = %f\n", score );
-#endif
-#if 0
- fprintf( stderr, "## score in intergroup_score = %f\n", score );
-#endif
- return( score );
}
-#endif
void intergroup_score_consweight( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
{
int i, j, k;
int len2 = len - 2;
- int ms1, ms2;
+ unsigned char ms1, ms2;
double tmpscore;
char *mseq1, *mseq2;
double efficient;
tmpscore = 0.0;
for( k=0; k<len; k++ )
{
- ms1 = (int)mseq1[k];
- ms2 = (int)mseq2[k];
- if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
+ ms1 = (unsigned char)mseq1[k];
+ ms2 = (unsigned char)mseq2[k];
+ if( ms1 == '-' && ms2 == '-' ) continue;
tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
- if( ms1 == (int)'-' )
+ if( ms1 == '-' )
{
tmpscore += (double)penalty;
tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
- while( (ms1=(int)mseq1[++k]) == (int)'-' )
+ while( (ms1=(unsigned char)mseq1[++k]) == '-' )
;
// tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
k--;
if( k >len2 ) break;
continue;
}
- if( ms2 == (int)'-' )
+ if( ms2 == '-' )
{
tmpscore += (double)penalty;
tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
- while( (ms2=(int)mseq2[++k]) == (int)'-' )
+ while( (ms2=(unsigned char)mseq2[++k]) == '-' )
;
// tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
k--;
}
}
*value += (double)tmpscore * (double)efficient;
-// fprintf( stderr, "val in _gapnomi = %f\n", *value );
+// reporterr( "val in _gapnomi = %f\n", *value );
}
}
#if 0
fprintf( stdout, "###score = %f\n", score );
#endif
#if DEBUG
- fprintf( stderr, "score in intergroup_score = %f\n", score );
+ reporterr( "score in intergroup_score = %f\n", score );
#endif
// return( score );
}
}
}
*value += (double)tmpscore * (double)efficient;
-// fprintf( stderr, "val in _gapnomi = %f\n", *value );
+// reporterr( "val in _gapnomi = %f\n", *value );
}
}
#if 0
fprintf( stdout, "###score = %f\n", score );
#endif
#if DEBUG
- fprintf( stderr, "score in intergroup_score = %f\n", score );
+ reporterr( "score in intergroup_score = %f\n", score );
#endif
// return( score );
}
-void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
+void intergroup_score_multimtx( int **whichmtx, double ***scoringmatrices, char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
{
- int i, j, k;
+ int i, j, k, c;
int len2 = len - 2;
- int ms1, ms2;
+ int mn1, mn2;
double tmpscore;
char *mseq1, *mseq2;
double efficient;
+ int gapnum = amino_n['-'];
double gaptmpscore;
double gapscore = 0.0;
-// fprintf( stderr, "#### in intergroup_score\n" );
+// reporterr( "#### in intergroup_score\n" );
// totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
// totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
+// reporterr( "\n intergroup_score_multimtx ..." );
*value = 0.0;
for( i=0; i<clus1; i++ )
{
for( j=0; j<clus2; j++ )
{
efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
+ c = whichmtx[i][j];
mseq1 = seq1[i];
mseq2 = seq2[j];
tmpscore = 0.0;
gaptmpscore = 0.0;
for( k=0; k<len; k++ )
{
- ms1 = (int)mseq1[k];
- ms2 = (int)mseq2[k];
- if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
-// tmpscore += (double)amino_dis[ms1][ms2];
- tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
+ mn1 = amino_n[(unsigned char)(mseq1[k])];
+ mn2 = amino_n[(unsigned char)(mseq2[k])];
+ if( mn1 == gapnum && mn2 == gapnum ) continue;
+ tmpscore += (double)scoringmatrices[c][mn1][mn2];
+// tmpscore += (double)scoringmtx[mn1][mn2];
- if( ms1 == (int)'-' )
+ if( mn1 == gapnum )
{
tmpscore += (double)penalty;
gaptmpscore += (double)penalty;
-// tmpscore += (double)amino_dis[ms1][ms2];
- tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
- while( (ms1=(int)mseq1[++k]) == (int)'-' )
-// tmpscore += (double)amino_dis[ms1][ms2];
- tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
+// tmpscore += (double)scoringmtx[mn1][mn2];
+ tmpscore += (double)scoringmatrices[c][mn1][mn2];
+ while( (mn1=amino_n[(unsigned char)mseq1[++k]]) == gapnum )
+ tmpscore += (double)scoringmatrices[c][mn1][mn2];
+// tmpscore += (double)scoringmtx[mn1][mn2];
k--;
if( k >len2 ) break;
continue;
}
- if( ms2 == (int)'-' )
+ if( mn2 == gapnum )
{
tmpscore += (double)penalty;
gaptmpscore += (double)penalty;
-// tmpscore += (double)amino_dis[ms1][ms2];
- tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
- while( (ms2=(int)mseq2[++k]) == (int)'-' )
-// tmpscore += (double)amino_dis[ms1][ms2];
- tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
+ tmpscore += (double)scoringmatrices[c][mn1][mn2];
+// tmpscore += (double)scoringmtx[mn1][mn2];
+ while( (mn2=amino_n[(unsigned char)mseq2[++k]]) == gapnum )
+ tmpscore += (double)scoringmatrices[c][mn1][mn2];
+// tmpscore += (double)scoringmtx[mn1][mn2];
k--;
if( k > len2 ) break;
continue;
gapscore += (double)gaptmpscore * (double)efficient;
}
}
+// reporterr( "done." );
#if 0
- fprintf( stderr, "###gapscore = %f\n", gapscore );
+ reporterr( "###gapscore = %f\n", gapscore );
#endif
#if DEBUG
- fprintf( stderr, "score in intergroup_score = %f\n", score );
+ reporterr( "score in intergroup_score = %f\n", score );
#endif
// return( score );
}
-void intergroup_score_new( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
+void intergroup_score_dynmtx( double **offsetmtx, int scoringmtx[0x80][0x80], char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
{
int i, j, k;
int len2 = len - 2;
int ms1, ms2;
double tmpscore;
char *mseq1, *mseq2;
- static double efficient[1];
+ double efficient;
+
+ double gaptmpscore;
+ double gapscore = 0.0;
+
+// reporterr( "#### in intergroup_score\n" );
// totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
// totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
+ reporterr( "\n intergroup_score_dynmtx ..." );
*value = 0.0;
for( i=0; i<clus1; i++ )
{
for( j=0; j<clus2; j++ )
{
- *efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
+ efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
mseq1 = seq1[i];
mseq2 = seq2[j];
tmpscore = 0.0;
+ gaptmpscore = 0.0;
for( k=0; k<len; k++ )
{
ms1 = (int)mseq1[k];
ms2 = (int)mseq2[k];
if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
- tmpscore += (double)amino_dis[ms1][ms2];
+ tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;
+// tmpscore += (double)scoringmtx[ms1][ms2];
if( ms1 == (int)'-' )
{
tmpscore += (double)penalty;
- tmpscore += (double)amino_dis[ms1][ms2];
+ gaptmpscore += (double)penalty;
+// tmpscore += (double)scoringmtx[ms1][ms2];
+ tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;;
while( (ms1=(int)mseq1[++k]) == (int)'-' )
- tmpscore += (double)amino_dis[ms1][ms2];
+ tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;
+// tmpscore += (double)scoringmtx[ms1][ms2];
k--;
if( k >len2 ) break;
continue;
if( ms2 == (int)'-' )
{
tmpscore += (double)penalty;
- tmpscore += (double)amino_dis[ms1][ms2];
+ gaptmpscore += (double)penalty;
+ tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;
+// tmpscore += (double)scoringmtx[ms1][ms2];
while( (ms2=(int)mseq2[++k]) == (int)'-' )
- tmpscore += (double)amino_dis[ms1][ms2];
+ tmpscore += (double)scoringmtx[ms1][ms2] + offsetmtx[i][j] * 600;
+// tmpscore += (double)scoringmtx[ms1][ms2];
k--;
if( k > len2 ) break;
continue;
}
}
- *value += (double)tmpscore * (double)*efficient;
+ *value += (double)tmpscore * (double)efficient;
+ gapscore += (double)gaptmpscore * (double)efficient;
}
}
+ reporterr( "done." );
#if 0
- fprintf( stdout, "###score = %f\n", score );
+ reporterr( "###gapscore = %f\n", gapscore );
#endif
#if DEBUG
- fprintf( stderr, "score in intergroup_score = %f\n", score );
+ reporterr( "score in intergroup_score = %f\n", score );
#endif
// return( score );
}
+void intergroup_score( char **seq1, char **seq2, double *eff1, double *eff2, int clus1, int clus2, int len, double *value )
+{
+ int i, j, k;
+ int len2 = len - 2;
+ unsigned char ms1, ms2;
+ double tmpscore;
+ char *mseq1, *mseq2;
+ double efficient;
+
+ double gaptmpscore;
+ double gapscore = 0.0;
+
+// reporterr( "#### in intergroup_score\n" );
+
+// totaleff1 = 0.0; for( i=0; i<clus1; i++ ) totaleff1 += eff1[i];
+// totaleff2 = 0.0; for( i=0; i<clus2; i++ ) totaleff2 += eff2[i];
+ *value = 0.0;
+ for( i=0; i<clus1; i++ )
+ {
+ for( j=0; j<clus2; j++ )
+ {
+ efficient = eff1[i] * eff2[j]; /* \e$B$J$<$+G[Ns$r;H$o$J$$$H$*$+$7$/$J$k\e(B, \e$BB?J,%P%0\e(B */
+ mseq1 = seq1[i];
+ mseq2 = seq2[j];
+ tmpscore = 0.0;
+ gaptmpscore = 0.0;
+ for( k=0; k<len; k++ )
+ {
+ ms1 = (unsigned char)mseq1[k];
+ ms2 = (unsigned char)mseq2[k];
+ if( ms1 == '-' && ms2 == '-' ) continue;
+// tmpscore += (double)amino_dis[ms1][ms2];
+ tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
+
+ if( ms1 == '-' )
+ {
+ tmpscore += (double)penalty;
+ gaptmpscore += (double)penalty;
+// tmpscore += (double)amino_dis[ms1][ms2];
+ tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
+ while( (ms1=(unsigned char)mseq1[++k]) == '-' )
+// tmpscore += (double)amino_dis[ms1][ms2];
+ tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
+ k--;
+ if( k >len2 ) break;
+ continue;
+ }
+ if( ms2 == '-' )
+ {
+ tmpscore += (double)penalty;
+ gaptmpscore += (double)penalty;
+// tmpscore += (double)amino_dis[ms1][ms2];
+ tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
+ while( (ms2=(unsigned char)mseq2[++k]) == '-' )
+// tmpscore += (double)amino_dis[ms1][ms2];
+ tmpscore += (double)amino_dis_consweight_multi[ms1][ms2];
+ k--;
+ if( k > len2 ) break;
+ continue;
+ }
+ }
+ *value += (double)tmpscore * (double)efficient;
+ gapscore += (double)gaptmpscore * (double)efficient;
+ }
+ }
+#if 0
+ reporterr( "###gapscore = %f\n", gapscore );
+#endif
+#if DEBUG
+ reporterr( "score in intergroup_score = %f\n", score );
+#endif
+// return( score );
+}
double score_calc5( char **seq, int s, double **eff, int ex ) /* method 3 deha nai */
{
for( k=0; k<len; k++ )
{
if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
+ tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
if( mseq1[k] == '-' )
{
tmpscore += penalty;
while( mseq1[++k] == '-' )
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
+ tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
k--;
if( k > len-2 ) break;
continue;
{
tmpscore += penalty;
while( mseq2[++k] == '-' )
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
+ tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
k--;
if( k > len-2 ) break;
continue;
for( k=0; k<len; k++ )
{
if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
+ tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
if( mseq1[k] == '-' )
{
tmpscore += penalty;
while( mseq1[++k] == '-' )
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
+ tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
k--;
if( k > len-2 ) break;
continue;
{
tmpscore += penalty;
while( mseq2[++k] == '-' )
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
+ tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
k--;
if( k > len-2 ) break;
continue;
}
}
/*
- fprintf( stderr, "score in score_calc5 = %f\n", score );
+ reporterr( "score in score_calc5 = %f\n", score );
*/
return( (double)score );
/*
-fprintf( trap_g, "score by fast = %f\n", (float)score );
+fprintf( trap_g, "score by fast = %f\n", (double)score );
tmpscore = score = 0.0;
for( i=0; i<s; i++ )
for( k=0; k<len; k++ )
{
if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]] + 400 * !scoremtx ;
+ tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]] + 400 * !scoremtx ;
c += efficient;
for( k=0; k<nseq-1; k++ )
{
- float minscore = 9999.0;
+ double minscore = 9999.0;
int im = -1, jm = -1;
int count;
}
}
-static void setnearest( int nseq, Bchain *acpt, float **eff, float *mindisfrompt, int *nearestpt, int pos )
-{
- int j;
- float tmpfloat;
- float **effptpt;
- Bchain *acptj;
+#define BLOCKSIZE 100
+#define LARGEBLOCKSIZE 100
- *mindisfrompt = 999.9;
- *nearestpt = -1;
+typedef struct _generaltdistarrthread_arg
+{
+ int para;
+ int njob;
+// int thread_no;
+ int m;
+ int *nlen;
+ char **seq;
+ int **skiptable;
+ int **pointt;
+ int *ttable;
+ int *tselfscore;
+ int *posshared;
+ int *joblist;
+ double *result;
+#ifdef enablemultithread
+ pthread_mutex_t *mutex;
+#endif
+} generaldistarrthread_arg_t;
-// if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
+static void *generalkmerdistarrthread( void *arg ) // enablemultithread == 0 demo tsukau
+{
+ generaldistarrthread_arg_t *targ = (generaldistarrthread_arg_t *)arg;
+ int njob = targ->njob;
+ int para = targ->para;
+ int m = targ->m;
+ int *nlen = targ->nlen;
+ int **pointt = targ->pointt;
+ int *ttable = targ->ttable;
+ int *tselfscore = targ->tselfscore;
+ int *joblist = targ->joblist;
+ int *posshared = targ->posshared;
+ double *result = targ->result;
+// double **partmtx = targ->partmtx;
+ int i, posinjoblist, n;
+
+// for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_lock( targ->mutex );
+#endif
+ if( *posshared >= njob ) // block no toki >=
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+ commonsextet_p( NULL, NULL );
+ return( NULL );
+ }
+ posinjoblist = *posshared;
+ *posshared += LARGEBLOCKSIZE;
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
-// for( j=pos+1; j<nseq; j++ )
- for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
- {
- j = acptj->pos;
-// if( (tmpfloat=*effpt++) < *mindisfrompt )
- if( (tmpfloat=eff[pos][j-pos]) < *mindisfrompt )
- {
- *mindisfrompt = tmpfloat;
- *nearestpt = j;
+ for( n=0; n<LARGEBLOCKSIZE&&posinjoblist<njob; n++ )
+ {
+ i = joblist[posinjoblist++];
+
+// if( i == m ) continue; // iranai
+
+ result[i] = distcompact( nlen[m], nlen[i], ttable, pointt[i], tselfscore[m], tselfscore[i] );
+
+ }
}
- }
- effptpt = eff;
-// for( j=0; j<pos; j++ )
- for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
- {
- j = acptj->pos;
-// if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt )
- if( (tmpfloat=eff[j][pos-j]) < *mindisfrompt )
- {
- *mindisfrompt = tmpfloat;
- *nearestpt = j;
+}
+
+static void *generalmsadistarrthread( void *arg ) // enablemultithread == 0 demo tsukau
+{
+ generaldistarrthread_arg_t *targ = (generaldistarrthread_arg_t *)arg;
+ int njob = targ->njob;
+ int para = targ->para;
+ int m = targ->m;
+ int *tselfscore = targ->tselfscore;
+ char **seq = targ->seq;
+ int **skiptable = targ->skiptable;
+ int *joblist = targ->joblist;
+ int *posshared = targ->posshared;
+ double *result = targ->result;
+// double **partmtx = targ->partmtx;
+ int i, posinjoblist, n;
+
+// for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_lock( targ->mutex );
+#endif
+ if( *posshared >= njob ) // block no toki >=
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+ return( NULL );
+ }
+ posinjoblist = *posshared;
+ *posshared += LARGEBLOCKSIZE;
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+
+ for( n=0; n<LARGEBLOCKSIZE&&posinjoblist<njob; n++ )
+ {
+ i = joblist[posinjoblist++];
+
+// if( i == m ) continue; // iranai
+
+ result[i] = distcompact_msa( seq[m], seq[i], skiptable[m], skiptable[i], tselfscore[m], tselfscore[i] );
+
+ }
}
- }
}
-static void setnearest_double_fullmtx( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos )
+#if 1
+static void kmerresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, int *tselfscore, int **pointt, int *nlen, int *singlettable1, double *result, int *joblist )
{
- int j;
- double tmpfloat;
- double **effptpt;
+ int i, j;
+ double tmpdouble;
+ double mindisfrom;
+ int nearest;
+// double **effptpt;
Bchain *acptj;
+// double *result;
+// int *joblist;
- *mindisfrompt = 999.9;
- *nearestpt = -1;
+ mindisfrom = 999.9;
+ nearest = -1;
-// if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
-// for( j=pos+1; j<nseq; j++ )
- for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
+// reporterr( "resetnearest..\r" );
+// printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt );
+
+// mindisfrom = 999.9;
+// nearest = -1;
+
+
+// result = calloc( nseq, sizeof( double ) );
+// joblist = calloc( nseq, sizeof( int ) );
+
+
+ for( acptj=(acpt+pos)->next,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
{
- j = acptj->pos;
-// if( (tmpfloat=*effpt++) < *mindisfrompt )
- if( (tmpfloat=eff[pos][j]) < *mindisfrompt )
+ i = acptj->pos;
+// if( i == pos ) continue;
+
+ if( distfrompt[pos] )
{
- *mindisfrompt = tmpfloat;
- *nearestpt = j;
+ tmpdouble = result[i] = distfrompt[pos][i];
+ if( tmpdouble < mindisfrom )
+ {
+ mindisfrom = tmpdouble;
+ nearest = i;
+ }
}
- }
- effptpt = eff;
-// for( j=0; j<pos; j++ )
- for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
- {
- j = acptj->pos;
-// if( (tmpfloat=(*effptpt++)[pos-j]) < *mindisfrompt )
- if( (tmpfloat=eff[j][pos]) < *mindisfrompt )
+ else if( distfrompt[i] )
{
- *mindisfrompt = tmpfloat;
- *nearestpt = j;
+ tmpdouble = result[i] = distfrompt[i][pos];
+ if( tmpdouble < mindisfrom )
+ {
+ mindisfrom = tmpdouble;
+ nearest = i;
+ }
}
+ else
+ joblist[j++] = i;
}
-}
-
+ for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru
+ {
+ i = acptj->pos;
+// if( i == pos ) continue;
-static void loadtreeoneline( int *ar, float *len, FILE *fp )
-{
- static char gett[1000];
+ if( distfrompt[pos] )
+ {
+ tmpdouble = result[i] = distfrompt[pos][i];
+ if( tmpdouble < mindisfrom )
+ {
+ mindisfrom = tmpdouble;
+ nearest = i;
+ }
+ }
+ else if( distfrompt[i] )
+ {
+ tmpdouble = result[i] = distfrompt[i][pos];
+ if( tmpdouble < mindisfrom )
+ {
+ mindisfrom = tmpdouble;
+ nearest = i;
+ }
+ }
+ else
+ joblist[j++] = i;
+ }
- fgets( gett, 999, fp );
-// fprintf( stderr, "gett=%s\n", gett );
+ if( j )
+ {
+// reporterr( "resetting in parallel!! j=%d\n", j );
+// exit( 1 );
+ int posshared;
+ generaldistarrthread_arg_t *targ;
+#ifdef enablemultithread
+ if( nthread )
+ {
+ pthread_t *handle;
+ pthread_mutex_t mutex;
- sscanf( gett, "%d %d %f %f", ar, ar+1, len, len+1 );
-
- ar[0]--;
- ar[1]--;
-
- if( ar[0] >= ar[1] )
- {
- fprintf( stderr, "Incorrect guide tree\n" );
- exit( 1 );
+ targ = calloc( nthread, sizeof( generaldistarrthread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ posshared = 0;
+ pthread_mutex_init( &mutex, NULL );
+ for( i=0; i<nthread; i++ )
+ {
+ targ[i].para = 1;
+ targ[i].njob = j;
+ targ[i].m = pos;
+ targ[i].tselfscore = tselfscore;
+ targ[i].nlen = nlen;
+ targ[i].pointt = pointt;
+ targ[i].ttable = singlettable1;
+ targ[i].joblist = joblist;
+ targ[i].result = result;
+ targ[i].posshared = &posshared;
+ targ[i].mutex = &mutex;
+
+ pthread_create( handle+i, NULL, generalkmerdistarrthread, (void *)(targ+i) );
+ }
+
+ for( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );
+ pthread_mutex_destroy( &mutex );
+ free( handle );
+ }
+ else
+#endif
+ {
+ targ = calloc( 1, sizeof( generaldistarrthread_arg_t ) );
+ posshared = 0;
+ {
+ targ[0].para = 0;
+ targ[0].njob = j;
+ targ[0].m = pos;
+ targ[0].tselfscore = tselfscore;
+ targ[0].nlen = nlen;
+ targ[0].pointt = pointt;
+ targ[0].ttable = singlettable1;
+ targ[0].joblist = joblist;
+ targ[0].result = result;
+ targ[0].posshared = &posshared;
+
+ generalkmerdistarrthread( targ );
+ }
+ }
+ free( targ );
+// sukoshi muda
+ for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
+ {
+ j = acptj->pos;
+ tmpdouble = result[j];
+ if( tmpdouble < mindisfrom )
+ {
+ mindisfrom = tmpdouble;
+ nearest = j;
+ }
+ }
+
+ for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru
+ {
+ j = acptj->pos;
+ tmpdouble = result[j];
+ if( tmpdouble < mindisfrom )
+ {
+ mindisfrom = tmpdouble;
+ nearest = j;
+ }
+ }
}
-// fprintf( stderr, "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] );
-// fprintf( stderr, "len[0] = %f, len[1] = %f\n", len[0], len[1] );
+ *mindisfrompt = mindisfrom;
+ *nearestpt = nearest;
+
+// free( joblist );
+// free( result );
}
-void loadtree( int nseq, int ***topol, float **len, char **name, int *nlen, Treedep *dep )
+#else
+static void kmerresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, int *tselfscore, int **pointt, int *nlen, int *singlettable1, double *resultnotused, int *joblistnotused )
{
- int i, j, k, miniim, maxiim, minijm, maxijm;
- int *intpt, *intpt2;
- static int *hist = NULL;
- static Bchain *ac = NULL;
- int im = -1, jm = -1;
- Bchain *acjmnext, *acjmprev;
- int prevnode;
- Bchain *acpti;
- int *pt1, *pt2, *pt11, *pt22;
- static int *nmemar;
- int nmemim, nmemjm;
- float minscore;
- int *nearest = NULL; // by D.Mathog, a guess
- float *mindisfrom = NULL; // by D.Mathog, a guess
- static char **tree;
- static char *treetmp;
- static char *nametmp;
- FILE *fp;
- int node[2];
+ int j;
+ double tmpdouble;
+ double mindisfrom;
+ int nearest;
+// double **effptpt;
+ Bchain *acptj;
- fp = fopen( "_guidetree", "r" );
- if( !fp )
- {
- fprintf( stderr, "cannot open _guidetree\n" );
- exit( 1 );
- }
+ mindisfrom = 999.9;
+ nearest = -1;
- if( !hist )
- {
- hist = AllocateIntVec( njob );
- ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
- nmemar = AllocateIntVec( njob );
- mindisfrom = AllocateFloatVec( njob );
- nearest = AllocateIntVec( njob );
- treetmp = AllocateCharVec( njob*50 );
- nametmp = AllocateCharVec( 31 );
- tree = AllocateCharMtx( njob, njob*50 );
- }
-
- for( i=0; i<nseq; i++ )
+// reporterr( "resetnearest..\r" );
+// printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt );
+
+// mindisfrom = 999.9;
+// nearest = -1;
+
+
+ for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
{
- for( j=0; j<30; j++ ) nametmp[j] = 0;
- for( j=0; j<30; j++ )
+ j = acptj->pos;
+
+ if( distfrompt[pos] )
+ tmpdouble=distfrompt[pos][j];
+ else if( distfrompt[j] )
+ tmpdouble=distfrompt[j][pos];
+// else if( seq )
+// tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] );
+ else
+ tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] );
+
+
+ if( tmpdouble < mindisfrom )
{
- if( isalnum( name[i][j] ) )
- nametmp[j] = name[i][j];
- else
- nametmp[j] = '_';
+ mindisfrom = tmpdouble;
+ nearest = j;
}
- nametmp[30] = 0;
-// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
- sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
}
- for( i=0; i<nseq; i++ )
+
+ for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru
{
- ac[i].next = ac+i+1;
- ac[i].prev = ac+i-1;
- ac[i].pos = i;
- }
- ac[nseq-1].next = NULL;
+ j = acptj->pos;
+ if( distfrompt[pos] )
+ tmpdouble=distfrompt[pos][j];
+ else if( distfrompt[j] )
+ tmpdouble=distfrompt[j][pos];
+// else if( seq )
+// tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] );
+ else
+ tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] );
+
- for( i=0; i<nseq; i++ )
- {
- hist[i] = -1;
- nmemar[i] = 1;
- }
- fprintf( stderr, "\n" );
- for( k=0; k<nseq-1; k++ )
- {
- if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
-#if 0
- minscore = 999.9;
- for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
- {
- i = acpti->pos;
-// fprintf( stderr, "k=%d i=%d\n", k, i );
- if( mindisfrom[i] < minscore ) // muscle
- {
- im = i;
- minscore = mindisfrom[i];
- }
- }
- jm = nearest[im];
- if( jm < im )
+ if( tmpdouble < mindisfrom )
{
- j=jm; jm=im; im=j;
+ mindisfrom = tmpdouble;
+ nearest = j;
}
-#else
- minscore = 0.0;
- len[k][0] = len[k][1] = -1.0;
- loadtreeoneline( node, len[k], fp );
- im = node[0];
- jm = node[1];
+ }
+// printf( "mindisfrom = %f\n", mindisfrom );
- if( len[k][0] == -1.0 || len[k][1] == -1.0 )
- {
- fprintf( stderr, "\n\nERROR: Branch length is not given.\n" );
- exit( 1 );
- }
+ *mindisfrompt = mindisfrom;
+ *nearestpt = nearest;
+}
+#endif
- if( len[k][0] < 0.0 ) len[k][0] = 0.0;
- if( len[k][1] < 0.0 ) len[k][1] = 0.0;
+#if 1
+static void msaresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, char **seq, int **skiptable, int *tselfscore, double *result, int *joblist )
+{
+ int i, j;
+ double tmpdouble;
+ double mindisfrom;
+ int nearest;
+// double **effptpt;
+ Bchain *acptj;
+// double *result;
+// int *joblist;
-#endif
+ mindisfrom = 999.9;
+ nearest = -1;
- prevnode = hist[im];
- if( dep ) dep[k].child0 = prevnode;
- nmemim = nmemar[im];
-// fprintf( stderr, "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
+// reporterr( "resetnearest..\r" );
+// printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt );
- intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
- if( prevnode == -1 )
+// mindisfrom = 999.9;
+// nearest = -1;
+
+
+// result = calloc( nseq, sizeof( double ) );
+// joblist = calloc( nseq, sizeof( int ) );
+
+// for( acptj=acpt,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
+ for( acptj=(acpt+pos)->next,j=0; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
+ {
+ i = acptj->pos;
+// if( i == pos ) continue;
+
+ if( distfrompt[pos] )
{
- *intpt++ = im;
- *intpt = -1;
+ tmpdouble = result[i] = distfrompt[pos][i];
+ if( tmpdouble < mindisfrom )
+ {
+ mindisfrom = tmpdouble;
+ nearest = i;
+ }
+ }
+ else if( distfrompt[i] )
+ {
+ tmpdouble = result[i] = distfrompt[i][pos];
+ if( tmpdouble < mindisfrom )
+ {
+ mindisfrom = tmpdouble;
+ nearest = i;
+ }
}
else
+ joblist[j++] = i;
+ }
+
+ for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru
+ {
+ i = acptj->pos;
+// if( i == pos ) continue;
+
+ if( distfrompt[pos] )
{
- pt1 = topol[prevnode][0];
- pt2 = topol[prevnode][1];
- if( *pt1 > *pt2 )
+ tmpdouble = result[i] = distfrompt[pos][i];
+ if( tmpdouble < mindisfrom )
{
- pt11 = pt2;
- pt22 = pt1;
+ mindisfrom = tmpdouble;
+ nearest = i;
}
- else
+ }
+ else if( distfrompt[i] )
+ {
+ tmpdouble = result[i] = distfrompt[i][pos];
+ if( tmpdouble < mindisfrom )
{
- pt11 = pt1;
- pt22 = pt2;
+ mindisfrom = tmpdouble;
+ nearest = i;
}
- for( intpt2=pt11; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- for( intpt2=pt22; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- *intpt = -1;
}
+ else
+ joblist[j++] = i;
+ }
- nmemjm = nmemar[jm];
- prevnode = hist[jm];
- if( dep ) dep[k].child1 = prevnode;
-
-// fprintf( stderr, "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
+ if( j )
+ {
+// reporterr( "resetting in parallel!! j=%d\r", j );
+// exit( 1 );
+ int posshared;
+ generaldistarrthread_arg_t *targ;
+ posshared = 0;
- intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
- if( !intpt )
+#ifdef enablemultithread
+ if( nthread )
{
- fprintf( stderr, "Cannot reallocate topol\n" );
- exit( 1 );
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+ targ = calloc( nthread, sizeof( generaldistarrthread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex, NULL );
+ for( i=0; i<nthread; i++ )
+ {
+ targ[i].para = 1;
+ targ[i].njob = j;
+ targ[i].m = pos;
+ targ[i].tselfscore = tselfscore;
+ targ[i].seq = seq;
+ targ[i].skiptable = skiptable;
+ targ[i].joblist = joblist;
+ targ[i].result = result;
+ targ[i].posshared = &posshared;
+ targ[i].mutex = &mutex;
+
+ pthread_create( handle+i, NULL, generalmsadistarrthread, (void *)(targ+i) );
+ }
+ for( j=0; j<nthread; j++ ) pthread_join( handle[j], NULL );
+ pthread_mutex_destroy( &mutex );
+ free( handle );
}
- if( prevnode == -1 )
+ else
+#endif
{
- *intpt++ = jm;
- *intpt = -1;
+ targ = calloc( 1, sizeof( generaldistarrthread_arg_t ) );
+ {
+ targ[0].para = 0;
+ targ[0].njob = j;
+ targ[0].m = pos;
+ targ[0].tselfscore = tselfscore;
+ targ[0].seq = seq;
+ targ[0].skiptable = skiptable;
+ targ[0].joblist = joblist;
+ targ[0].result = result;
+ targ[0].posshared = &posshared;
+
+ generalmsadistarrthread( targ );
+ }
}
- else
+ free( targ );
+// sukoshi muda
+ for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
{
- pt1 = topol[prevnode][0];
- pt2 = topol[prevnode][1];
- if( *pt1 > *pt2 )
+ j = acptj->pos;
+ tmpdouble = result[j];
+ if( tmpdouble < mindisfrom )
{
- pt11 = pt2;
- pt22 = pt1;
+ mindisfrom = tmpdouble;
+ nearest = j;
}
- else
+ }
+
+ for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru
+ {
+ j = acptj->pos;
+ tmpdouble = result[j];
+ if( tmpdouble < mindisfrom )
{
- pt11 = pt1;
- pt22 = pt2;
+ mindisfrom = tmpdouble;
+ nearest = j;
}
- for( intpt2=pt11; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- for( intpt2=pt22; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- *intpt = -1;
}
- minscore *= 0.5;
+ }
-// len[k][0] = ( minscore - tmptmplen[im] );
-// len[k][1] = ( minscore - tmptmplen[jm] );
-// len[k][0] = -1;
-// len[k][1] = -1;
+// printf( "mindisfrom = %f\n", mindisfrom );
+ *mindisfrompt = mindisfrom;
+ *nearestpt = nearest;
- hist[im] = k;
- nmemar[im] = nmemim + nmemjm;
+// free( joblist );
+// free( result );
+}
+#else
+static void msaresetnearest( int nseq, Bchain *acpt, double **distfrompt, double *mindisfrompt, int *nearestpt, int pos, char **seq, int **skiptable, int *tselfscore, double *resultnotused, int *joblistnotused )
+{
+ int j;
+ double tmpdouble;
+ double mindisfrom;
+ int nearest;
+// double **effptpt;
+ Bchain *acptj;
- mindisfrom[im] = 999.9;
- for( acpti=ac; acpti!=NULL; acpti=acpti->next )
- {
- i = acpti->pos;
- if( i != im && i != jm )
- {
- if( i < im )
- {
- miniim = i;
- maxiim = im;
- minijm = i;
- maxijm = jm;
- }
- else if( i < jm )
- {
- miniim = im;
- maxiim = i;
- minijm = i;
- maxijm = jm;
- }
- else
- {
- miniim = im;
- maxiim = i;
- minijm = jm;
- maxijm = i;
- }
- }
- }
+ mindisfrom = 999.9;
+ nearest = -1;
- sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
- strcpy( tree[im], treetmp );
-// fprintf( stderr, "im,jm=%d,%d\n", im, jm );
- acjmprev = ac[jm].prev;
- acjmnext = ac[jm].next;
- acjmprev->next = acjmnext;
- if( acjmnext != NULL )
- acjmnext->prev = acjmprev;
-// free( (void *)eff[jm] ); eff[jm] = NULL;
+// reporterr( "resetnearest..\r" );
+// printf( "[%d], %f, dist=%d ->", pos, *distfrompt, *nearestpt );
-#if 0 // muscle seems to miss this.
- for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+// mindisfrom = 999.9;
+// nearest = -1;
+
+
+ for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next ) // setnearest ni awaseru
+ {
+ j = acptj->pos;
+
+ if( distfrompt[pos] )
+ tmpdouble=distfrompt[pos][j];
+ else if( distfrompt[j] )
+ tmpdouble=distfrompt[j][pos];
+ else
+ tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] );
+// else
+// tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] );
+
+
+ if( tmpdouble < mindisfrom )
{
- i = acpti->pos;
- if( nearest[i] == im )
- {
-// fprintf( stderr, "calling setnearest\n" );
-// setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
- }
+ mindisfrom = tmpdouble;
+ nearest = j;
}
-#endif
+ }
+ for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next ) // setnearest ni awaseru
+ {
+ j = acptj->pos;
-#if 0
- fprintf( stdout, "vSTEP-%03d:\n", k+1 );
- fprintf( stdout, "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
- fprintf( stdout, "\n" );
- fprintf( stdout, "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
- fprintf( stdout, "\n" );
-#endif
- }
- fclose( fp );
- fp = fopen( "infile.tree", "w" );
- fprintf( fp, "%s\n", treetmp );
- fprintf( fp, "#by loadtree\n" );
- fclose( fp );
+ if( distfrompt[pos] )
+ tmpdouble=distfrompt[pos][j];
+ else if( distfrompt[j] )
+ tmpdouble=distfrompt[j][pos];
+ else
+ tmpdouble=distcompact_msa( seq[pos], seq[j], skiptable[pos], skiptable[j], tselfscore[pos], tselfscore[j] );
+// else
+// tmpdouble=distcompact( nlen[pos], nlen[j], singlettable1, pointt[j], tselfscore[pos], tselfscore[j] );
+
- FreeCharMtx( tree );
- free( treetmp );
- free( nametmp );
- free( hist ); hist = NULL;
- free( (char *)ac ); ac = NULL;
- free( (void *)nmemar ); nmemar = NULL;
- free( mindisfrom );
- free( nearest );
+ if( tmpdouble < mindisfrom )
+ {
+ mindisfrom = tmpdouble;
+ nearest = j;
+ }
+ }
+// printf( "mindisfrom = %f\n", mindisfrom );
+ *mindisfrompt = mindisfrom;
+ *nearestpt = nearest;
}
+#endif
-static float sueff1, sueff05;
-static double sueff1_double, sueff05_double;
-
-static float cluster_mix_float( float d1, float d2 )
-{
- return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 );
-}
-static float cluster_average_float( float d1, float d2 )
-{
- return( ( d1 + d2 ) * 0.5 );
-}
-static float cluster_minimum_float( float d1, float d2 )
-{
- return( MIN( d1, d2 ) );
-}
-static double cluster_mix_double( double d1, double d2 )
+static void setnearest( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos )
{
- return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double );
+ int j;
+ double tmpdouble;
+ double mindisfrom;
+ int nearest;
+// double **effptpt;
+ Bchain *acptj;
+
+ mindisfrom = 999.9;
+ nearest = -1;
+
+// printf( "[%d], %f, dist=%d ->", pos, *mindisfrompt, *nearestpt );
+
+// if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
+
+// for( j=pos+1; j<nseq; j++ )
+ for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
+ {
+ j = acptj->pos;
+// if( (tmpdouble=*effpt++) < *mindisfrompt )
+ if( (tmpdouble=eff[pos][j-pos]) < mindisfrom )
+ {
+ mindisfrom = tmpdouble;
+ nearest = j;
+ }
+ }
+// effptpt = eff;
+// for( j=0; j<pos; j++ )
+ for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
+ {
+ j = acptj->pos;
+// if( (tmpdouble=(*effptpt++)[pos-j]) < *mindisfrompt )
+ if( (tmpdouble=eff[j][pos-j]) < mindisfrom )
+ {
+ mindisfrom = tmpdouble;
+ nearest = j;
+ }
+ }
+
+ *mindisfrompt = mindisfrom;
+ *nearestpt = nearest;
+// printf( "%f, %d \n", pos, *mindisfrompt, *nearestpt );
}
-static double cluster_average_double( double d1, double d2 )
+
+static void setnearest_double_fullmtx( int nseq, Bchain *acpt, double **eff, double *mindisfrompt, int *nearestpt, int pos )
{
- return( ( d1 + d2 ) * 0.5 );
+ int j;
+ double tmpdouble;
+ double **effptpt;
+ Bchain *acptj;
+
+ *mindisfrompt = 999.9;
+ *nearestpt = -1;
+
+// if( (acpt+pos)->next ) effpt = eff[pos]+(acpt+pos)->next->pos-pos;
+
+// for( j=pos+1; j<nseq; j++ )
+ for( acptj=(acpt+pos)->next; acptj!=NULL; acptj=acptj->next )
+ {
+ j = acptj->pos;
+// if( (tmpdouble=*effpt++) < *mindisfrompt )
+ if( (tmpdouble=eff[pos][j]) < *mindisfrompt )
+ {
+ *mindisfrompt = tmpdouble;
+ *nearestpt = j;
+ }
+ }
+ effptpt = eff;
+// for( j=0; j<pos; j++ )
+ for( acptj=acpt; (acptj&&acptj->pos!=pos); acptj=acptj->next )
+ {
+ j = acptj->pos;
+// if( (tmpdouble=(*effptpt++)[pos-j]) < *mindisfrompt )
+ if( (tmpdouble=eff[j][pos]) < *mindisfrompt )
+ {
+ *mindisfrompt = tmpdouble;
+ *nearestpt = j;
+ }
+ }
}
-static double cluster_minimum_double( double d1, double d2 )
+
+
+
+static void loadtreeoneline( int *ar, double *len, FILE *fp )
{
- return( MIN( d1, d2 ) );
+ static char gett[1000];
+ int res;
+ char *p;
+
+ p = fgets( gett, 999, fp );
+ if( p == NULL )
+ {
+ reporterr( "\n\nFormat error (1) in the tree? It has to be a bifurcated and rooted tree.\n" );
+ reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
+ exit( 1 );
+ }
+
+
+ res = sscanf( gett, "%d %d %lf %lf", ar, ar+1, len, len+1 );
+ if( res != 4 )
+ {
+ reporterr( "\n\nFormat error (2) in the tree? It has to be a bifurcated and rooted tree.\n" );
+ reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
+ exit( 1 );
+ }
+
+ ar[0]--;
+ ar[1]--;
+
+ if( ar[0] >= ar[1] )
+ {
+ reporterr( "\n\nIncorrect guide tree\n" );
+ reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
+ exit( 1 );
+ }
+
+
+// reporterr( "ar[0] = %d, ar[1] = %d\n", ar[0], ar[1] );
+// reporterr( "len[0] = %f, len[1] = %f\n", len[0], len[1] );
}
-void loadtop( int nseq, float **eff, int ***topol, float **len ) // computes branch length BUG!!
+void loadtop( int nseq, double **mtx, int ***topol, double **len, char **name, int *nlen, Treedep *dep )
{
- int i, k, miniim, maxiim, minijm, maxijm;
+ int i, j, k, minijm, maxijm;
int *intpt, *intpt2;
- static Bchain *ac = NULL;
- float eff1, eff0;
- static float *tmptmplen = NULL;
- static int *hist = NULL;
+ int *hist = NULL;
+ Bchain *ac = NULL;
int im = -1, jm = -1;
Bchain *acjmnext, *acjmprev;
int prevnode;
- Bchain *acpti;
int *pt1, *pt2, *pt11, *pt22;
- static int *nmemar;
+ int *nmemar;
int nmemim, nmemjm;
- float minscore;
- static char **tree;
- static char *treetmp;
+ char **tree;
+ char *treetmp;
+ char *nametmp, *nameptr, *tmpptr;
+ char namec;
FILE *fp;
int node[2];
- float dumfl[2];
- float (*clusterfuncpt[1])(float,float);
-
-
- sueff1 = 1 - SUEFF;
- sueff05 = SUEFF * 0.5;
- if ( treemethod == 'X' )
- clusterfuncpt[0] = cluster_mix_float;
- else if ( treemethod == 'E' )
- clusterfuncpt[0] = cluster_average_float;
- else if ( treemethod == 'q' )
- clusterfuncpt[0] = cluster_minimum_float;
- else
- {
- fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
- exit( 1 );
- }
+ double *height;
+ double clusterdist;
+ int mpair, mi, mj;
fp = fopen( "_guidetree", "r" );
if( !fp )
{
- fprintf( stderr, "cannot open _guidetree\n" );
+ reporterr( "cannot open _guidetree\n" );
exit( 1 );
}
if( !hist )
{
- treetmp = AllocateCharVec( njob*50 );
- tree = AllocateCharMtx( njob, njob*50 );
- hist = AllocateIntVec( njob );
- tmptmplen = AllocateFloatVec( njob );
- ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
- nmemar = AllocateIntVec( njob );
+ hist = AllocateIntVec( nseq );
+ ac = (Bchain *)malloc( nseq * sizeof( Bchain ) );
+ nmemar = AllocateIntVec( nseq );
+// treetmp = AllocateCharVec( nseq*50 );
+ treetmp = NULL;
+ nametmp = AllocateCharVec( 1000 ); // nagasugi
+// tree = AllocateCharMtx( nseq, nseq*50 );
+ tree = AllocateCharMtx( nseq, 0 );
+ height = AllocateFloatVec( nseq );
}
-
- for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
+
+ for( i=0; i<nseq; i++ )
+ {
+ for( j=0; j<999; j++ ) nametmp[j] = 0;
+ for( j=0; j<999; j++ )
+ {
+ namec = name[i][j];
+ if( namec == 0 )
+ break;
+ else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
+ nametmp[j] = namec;
+ else
+ nametmp[j] = '_';
+ }
+ nametmp[j] = 0;
+// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
+ if( tree[i] == NULL )
+ {
+ reporterr( "Cannot allocate tree!\n" );
+ exit( 1 );
+ }
+ sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
+ }
+
+
for( i=0; i<nseq; i++ )
{
ac[i].next = ac+i+1;
}
ac[nseq-1].next = NULL;
- for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
+
for( i=0; i<nseq; i++ )
{
hist[i] = -1;
nmemar[i] = 1;
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
for( k=0; k<nseq-1; k++ )
{
- if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
-
+ if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
#if 0
- minscore = 99999.9;
+ minscore = 999.9;
for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
- effpt = eff[i=acpti->pos];
-// i = acpti->pos;
- for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
- {
-// j=acptj->pos;
-// tmpfloat = eff[i][j-i];
-// if( tmpfloat < minscore )
- if( (tmpfloat= effpt[(j=acptj->pos)-i]) < minscore )
- {
- minscore = tmpfloat;
- im = i; jm = j;
- }
+ i = acpti->pos;
+// reporterr( "k=%d i=%d\n", k, i );
+ if( mindisfrom[i] < minscore ) // muscle
+ {
+ im = i;
+ minscore = mindisfrom[i];
}
}
-
-// fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
+ jm = nearest[im];
+ if( jm < im )
+ {
+ j=jm; jm=im; im=j;
+ }
#else
- dumfl[0] = dumfl[1] = -1.0;
- loadtreeoneline( node, dumfl, fp );
+ len[k][0] = len[k][1] = -1.0;
+ loadtreeoneline( node, len[k], fp );
im = node[0];
jm = node[1];
- minscore = eff[im][jm-im];
-
-// fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
-
- if( dumfl[0] != -1.0 || dumfl[1] != -1.0 )
+ if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL )
{
- fprintf( stderr, "\n\nERROR: Branch length should not be given.\n" );
+ reporterr( "\n\nCheck the guide tree.\n" );
+ reporterr( "im=%d, jm=%d\n", im+1, jm+1 );
+ reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
exit( 1 );
}
#endif
-
prevnode = hist[im];
+ if( dep ) dep[k].child0 = prevnode;
nmemim = nmemar[im];
+
+// reporterr( "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
+
intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
if( prevnode == -1 )
{
*intpt = -1;
}
- prevnode = hist[jm];
+
nmemjm = nmemar[jm];
+ prevnode = hist[jm];
+ if( dep ) dep[k].child1 = prevnode;
+
+// reporterr( "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
+
intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
if( !intpt )
{
- fprintf( stderr, "Cannot reallocate topol\n" );
+ reporterr( "Cannot reallocate topol\n" );
exit( 1 );
}
if( prevnode == -1 )
*intpt = -1;
}
- minscore *= 0.5;
-
- len[k][0] = ( minscore - tmptmplen[im] );
- len[k][1] = ( minscore - tmptmplen[jm] );
- if( len[k][0] < 0.0 ) len[k][0] = 0.0;
- if( len[k][1] < 0.0 ) len[k][1] = 0.0;
+// len[k][0] = ( minscore - tmptmplen[im] );
+// len[k][1] = ( minscore - tmptmplen[jm] );
+// len[k][0] = -1;
+// len[k][1] = -1;
- tmptmplen[im] = minscore;
hist[im] = k;
nmemar[im] = nmemim + nmemjm;
- for( acpti=ac; acpti!=NULL; acpti=acpti->next )
- {
- i = acpti->pos;
- if( i != im && i != jm )
- {
- if( i < im )
- {
- miniim = i;
- maxiim = im;
- minijm = i;
- maxijm = jm;
- }
- else if( i < jm )
- {
- miniim = im;
- maxiim = i;
- minijm = i;
- maxijm = jm;
- }
- else
- {
- miniim = im;
- maxiim = i;
- minijm = jm;
- maxijm = i;
- }
- eff0 = eff[miniim][maxiim-miniim];
- eff1 = eff[minijm][maxijm-minijm];
+
+ if( len[k][0] == -1 || len[k][1] == -1 )
+ {
+ reporterr( "Re-computing the length of branch %d..\n", k );
+ clusterdist = 0.0;
+ mpair = 0;
+ for( i=0; (mi=topol[k][0][i])>-1; i++ ) for( j=0; (mj=topol[k][1][j])>-1; j++ )
+ {
+ minijm = MIN(mi,mj);
+ maxijm = MAX(mi,mj);
+ clusterdist += mtx[minijm][maxijm-minijm];
+ mpair += 1;
+ }
+ clusterdist /= (double)mpair;
+ reporterr( "clusterdist = %f\n", clusterdist );
+ if( len[k][0] == -1 ) len[k][0] = clusterdist/2.0 - height[im];
+ if( len[k][1] == -1 ) len[k][1] = clusterdist/2.0 - height[im];
+
+ fprintf( stderr, "len0 = %f\n", len[k][0] );
+ fprintf( stderr, "len1 = %f\n\n", len[k][1] );
+ }
+
#if 0
- eff[miniim][maxiim-miniim] =
- MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
-#else
- eff[miniim][maxiim-miniim] =
- (clusterfuncpt[0])( eff0, eff1 );
+ fprintf( stderr, "vSTEP-%03d:\n", k+1 );
+ fprintf( stderr, "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 );
+ fprintf( stderr, "\n" );
+
#endif
- }
- }
-// sprintf( treetmp, "(%s,%s)", tree[im], tree[jm] );
+ height[im] += len[k][0]; // for ig tree, 2015/Dec/25
+ dep[k].distfromtip = height[im]; // for ig tree, 2015/Dec/25
+// reporterr( "##### dep[%d].distfromtip = %f\n", k, height[im] );
+
+
+
+ treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
+ if( !treetmp )
+ {
+ reporterr( "Cannot allocate treetmp\n" );
+ exit( 1 );
+ }
sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ free( tree[im] );
+ free( tree[jm] );
+ tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
+ tree[jm] = NULL;
+ if( tree[im] == NULL )
+ {
+ reporterr( "Cannot reallocate tree!\n" );
+ exit( 1 );
+ }
strcpy( tree[im], treetmp );
+// reporterr( "im,jm=%d,%d\n", im, jm );
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
acjmprev->next = acjmnext;
if( acjmnext != NULL )
acjmnext->prev = acjmprev;
- free( (void *)eff[jm] ); eff[jm] = NULL;
-#if 0
- fprintf( stdout, "vSTEP-%03d:\n", k+1 );
- fprintf( stdout, "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
- fprintf( stdout, "\n" );
- fprintf( stdout, "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
- fprintf( stdout, "\n" );
+// free( (void *)eff[jm] ); eff[jm] = NULL;
+
+#if 0 // muscle seems to miss this.
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+ if( nearest[i] == im )
+ {
+// reporterr( "calling setnearest\n" );
+// setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
+ }
+ }
#endif
+
+
}
-#if 1
fclose( fp );
fp = fopen( "infile.tree", "w" );
fprintf( fp, "%s\n", treetmp );
- fprintf( fp, "by loadtop\n" );
+ fprintf( fp, "#by loadtop\n" );
fclose( fp );
-#endif
- free( (void *)tmptmplen ); tmptmplen = NULL;
- free( hist ); hist = NULL;
- free( (char *)ac ); ac = NULL;
- free( (void *)nmemar ); nmemar = NULL;
+
+ FreeCharMtx( tree );
+ free( treetmp );
+ free( nametmp );
+ free( hist );
+ free( (char *)ac );
+ free( (void *)nmemar );
+ free( height );
}
-void fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( int nseq, float **eff, int ***topol, float **len, char **name, int *nlen, Treedep *dep )
+void stringshuffle( int *ary, int size )
{
- int i, j, k, miniim, maxiim, minijm, maxijm;
- int *intpt, *intpt2;
- float tmpfloat;
- float eff1, eff0;
- static float *tmptmplen = NULL;
- static int *hist = NULL;
- static Bchain *ac = NULL;
- int im = -1, jm = -1;
- Bchain *acjmnext, *acjmprev;
- int prevnode;
- Bchain *acpti;
- int *pt1, *pt2, *pt11, *pt22;
- static int *nmemar;
- int nmemim, nmemjm;
- float minscore;
- int *nearest = NULL; // by D.Mathog, a guess
- float *mindisfrom = NULL; // by D.Mathog, a guess
- static char **tree;
- static char *treetmp;
- static char *nametmp, *nameptr, *tmpptr;
- FILE *fp;
- float (*clusterfuncpt[1])(float,float);
+ int i;
+ for(i=0;i<size;i++)
+ {
+ int j = rand()%size;
+ int t = ary[i];
+ ary[i] = ary[j];
+ ary[j] = t;
+ }
+}
+void topolorder( int nseq, int *order, int *posinorder, int ***topol, Treedep *dep, int pos, int nchild )
+{
+ int *str;
+ int child0, child1;
+ child0 = dep[pos].child0;
+ child1 = dep[pos].child1;
- sueff1 = 1 - SUEFF;
- sueff05 = SUEFF * 0.5;
- if ( treemethod == 'X' )
- clusterfuncpt[0] = cluster_mix_float;
- else if ( treemethod == 'E' )
- clusterfuncpt[0] = cluster_average_float;
- else if ( treemethod == 'q' )
- clusterfuncpt[0] = cluster_minimum_float;
- else
- {
- fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
- exit( 1 );
- }
+// int i;
- if( !hist )
- {
- hist = AllocateIntVec( njob );
- tmptmplen = AllocateFloatVec( njob );
- ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
- nmemar = AllocateIntVec( njob );
- mindisfrom = AllocateFloatVec( njob );
- nearest = AllocateIntVec( njob );
- treetmp = AllocateCharVec( njob*150 );
- nametmp = AllocateCharVec( 130 );
- tree = AllocateCharMtx( njob, njob*150 );
- }
-
- for( i=0; i<nseq; i++ )
+ if( nchild == 0 || nchild == 2 )
{
- for( j=0; j<130; j++ ) nametmp[j] = 0;
- for( j=0; j<130; j++ )
+ if( child0 == -1 )
{
- if( name[i][j] == 0 )
- break;
- else if( isalnum( name[i][j] ) )
- nametmp[j] = name[i][j];
- else
- nametmp[j] = '_';
- }
- nametmp[129] = 0;
-// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
- if( outnumber )
- nameptr = strstr( nametmp, "_numo_e" ) + 8;
- else
- nameptr = nametmp + 1;
+ str = calloc( 2, sizeof( int ) );
+ str[0] = topol[pos][0][0]; // kanarazu memsave format nara, tanjunka dekiru.
+ str[1] = -1;
- if( (tmpptr=strstr( nameptr, "_oripos__" )) ) nameptr = tmpptr + 9; // = -> _ no tame
- sprintf( tree[i], "%d_%.60s", i+1, nameptr );
- }
- for( i=0; i<nseq; i++ )
- {
- ac[i].next = ac+i+1;
- ac[i].prev = ac+i-1;
- ac[i].pos = i;
- }
- ac[nseq-1].next = NULL;
+// for( i=0; order[i]!=-1; i++ )
+// ;
+// reporterr( "0: i=%d, *posinorder=%d\n", i, *posinorder );
- for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
+ intcpy( order+*posinorder, str );
+// intcat( order, str );
- for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
- for( i=0; i<nseq; i++ )
- {
- hist[i] = -1;
- nmemar[i] = 1;
- }
- fprintf( stderr, "\n" );
- for( k=0; k<nseq-1; k++ )
- {
- if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
+ *posinorder += 1;
- minscore = 999.9;
- for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
- {
- i = acpti->pos;
-// fprintf( stderr, "k=%d i=%d\n", k, i );
- if( mindisfrom[i] < minscore ) // muscle
- {
- im = i;
- minscore = mindisfrom[i];
- }
+ free( str );
}
- jm = nearest[im];
- if( jm < im )
+ else
{
- j=jm; jm=im; im=j;
+ topolorder( nseq, order, posinorder, topol, dep, child0, 2 );
}
+ }
- prevnode = hist[im];
- if( dep ) dep[k].child0 = prevnode;
- nmemim = nmemar[im];
- intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
- if( prevnode == -1 )
+ if( nchild == 1 || nchild == 2 )
+ {
+ if( child1 == -1 )
{
- *intpt++ = im;
- *intpt = -1;
+ str = calloc( 2, sizeof( int ) );
+ str[0] = topol[pos][1][0]; // kanarazu memsave format nara, tanjunka dekiru.
+ str[1] = -1;
+
+
+// for( i=0; order[i]!=-1; i++ )
+// ;
+// reporterr( "1: i=%d, *posinorder=%d\n", i, *posinorder );
+
+ intcpy( order+*posinorder, str );
+// intcat( order, str );
+
+
+ *posinorder += 1;
+ free( str );
}
else
{
- pt1 = topol[prevnode][0];
- pt2 = topol[prevnode][1];
- if( *pt1 > *pt2 )
+ topolorder( nseq, order, posinorder, topol, dep, child1, 2 );
+ }
+ }
+// return( posinorder );
+}
+
+#if CANONICALTREEFORMAT
+void createchain( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int shuffle, int seed )
+{
+ FILE *fp;
+ int i, j;
+ double l, ll;
+ int treelen;
+ char **tree;
+ char *instanttree;
+ int posinit;
+// char *treetmp, *tt;
+ char *nametmp, *nameptr, *tmpptr;
+ char namec;
+ int *order;
+ int im, jm, mm;
+
+ if( treeout )
+ {
+// treetmp = NULL;
+ nametmp = AllocateCharVec( 1000 ); // nagasugi
+ tree = AllocateCharMtx( nseq, 0 );
+
+ treelen = nseq;
+ for( i=0; i<nseq; i++ )
+ {
+
+ for( j=0; j<999; j++ ) nametmp[j] = 0;
+ for( j=0; j<999; j++ )
{
- pt11 = pt2;
- pt22 = pt1;
+ namec = name[i][j];
+ if( namec == 0 )
+ break;
+ else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
+ nametmp[j] = namec;
+ else
+ nametmp[j] = '_';
}
+ nametmp[j] = 0;
+// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
else
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
+ if( tree[i] == NULL )
{
- pt11 = pt1;
- pt22 = pt2;
+ reporterr( "Cannot allocate tree!\n" );
+ exit( 1 );
}
- for( intpt2=pt11; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- for( intpt2=pt22; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- *intpt = -1;
+ sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
+ treelen += strlen( tree[i] ) + 20;
+
}
- prevnode = hist[jm];
- if( dep ) dep[k].child1 = prevnode;
- nmemjm = nmemar[jm];
- intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
- if( !intpt )
+ instanttree = calloc( treelen, sizeof( char ) );
+ posinit = 0;
+ for( i=0; i<nseq-1; i++ )
{
- fprintf( stderr, "Cannot reallocate topol\n" );
- exit( 1 );
+ instanttree[i] = '(';
+ posinit++;
}
- if( prevnode == -1 )
+
+ }
+
+
+ order = calloc( nseq, sizeof( int ) );
+ for( i=0; i<nseq; i++ ) order[i] = i;
+
+ srand( seed );
+ if( shuffle ) stringshuffle( order, nseq );
+
+ ll = l = 2.0 / nseq;
+
+ im = order[0];
+ jm = order[1];
+
+ topol[0][0] = (int *)realloc( topol[0][0], ( 2 ) * sizeof( int ) );
+ topol[0][1] = (int *)realloc( topol[0][1], ( 2 ) * sizeof( int ) );
+ if( im < jm )
+ {
+ topol[0][0][0] = im;
+ topol[0][0][1] = -1;
+ topol[0][1][0] = jm;
+ topol[0][1][1] = -1;
+ mm = im;
+ }
+ else
+ {
+ topol[0][0][0] = jm;
+ topol[0][0][1] = -1;
+ topol[0][1][0] = im;
+ topol[0][1][1] = -1;
+ mm = jm;
+ }
+ len[0][0] = len[0][1] = l;
+ dep[0].child1 = -1;
+ dep[0].child0 = -1;
+ dep[0].distfromtip = l;
+ ll += l;
+
+ if( treeout )
+ {
+ posinit += sprintf( instanttree+posinit, "%s:%7.5f,", tree[im], len[0][0] );
+// reporterr( "instanttree = %s\n", instanttree );
+ }
+
+ for( i=1; i<nseq-1; i++ )
+ {
+ im = order[i];
+ jm = order[i+1];
+
+ if( mm < jm )
{
- *intpt++ = jm;
- *intpt = -1;
+#if MEMSAVE
+ topol[i][0] = (int *)realloc( topol[i][0], ( 2 ) * sizeof( int ) );
+ topol[i][0][0] = mm;
+ topol[i][0][1] = -1;
+#else
+ topol[i][0] = (int *)realloc( topol[i][0], ( i + 2 ) * sizeof( int ) );
+ intcpy( topol[i][0], topol[i-1][0] );
+ intcat( topol[i][0], topol[i-1][1] );
+#endif
+ topol[i][1] = (int *)realloc( topol[i][1], ( 2 ) * sizeof( int ) );
+ topol[i][1][0] = jm;
+ topol[i][1][1] = -1;
+
+// reporterr( "step %d\n", i );
+// for( j=0; topol[i][0][j]!=-1; j++ ) reporterr( "%5d ", topol[i][0][j] );
+// reporterr( "\n", i );
+// for( j=0; topol[i][1][j]!=-1; j++ ) reporterr( "%5d ", topol[i][1][j] );
+// reporterr( "\n\n", i );
+//
+ len[i][0] = l;
+ len[i][1] = ll;
+
+ if( dep )
+ {
+ dep[i].child0 = i-1;
+ dep[i].child1 = -1;
+ dep[i].distfromtip = ll;
+ }
}
else
{
- pt1 = topol[prevnode][0];
- pt2 = topol[prevnode][1];
- if( *pt1 > *pt2 )
+
+#if MEMSAVE
+ topol[i][1] = (int *)realloc( topol[i][1], ( 2 ) * sizeof( int ) );
+ topol[i][1][0] = mm;
+ topol[i][1][1] = -1;
+#else
+ topol[i][1] = (int *)realloc( topol[i][1], ( i + 2 ) * sizeof( int ) );
+ intcpy( topol[i][1], topol[i-1][0] );
+ intcat( topol[i][1], topol[i-1][1] );
+#endif
+ topol[i][0] = (int *)realloc( topol[i][0], ( 2 ) * sizeof( int ) );
+ topol[i][0][0] = jm;
+ topol[i][0][1] = -1;
+
+ mm = jm;
+
+// reporterr( "step %d\n", i );
+// for( j=0; topol[i][0][j]!=-1; j++ ) reporterr( "%5d ", topol[i][0][j] );
+// reporterr( "\n", i );
+// for( j=0; topol[i][1][j]!=-1; j++ ) reporterr( "%5d ", topol[i][1][j] );
+// reporterr( "\n\n", i );
+//
+
+ len[i][1] = l;
+ len[i][0] = ll;
+
+ if( dep )
{
- pt11 = pt2;
- pt22 = pt1;
+ dep[i].child1 = i-1;
+ dep[i].child0 = -1;
+ dep[i].distfromtip = ll;
}
- else
+ }
+
+ if( treeout )
+ {
+ posinit += sprintf( instanttree+posinit, "%s:%7.5f):%7.5f,", tree[im], ll-l, l );
+// reporterr( "instanttree (in loop) = %s\n", instanttree );
+#if 0
+ if( i % 1000 == 0 ) reporterr( "\r%d/%d", i, nseq );
+// reporterr( "size = %d\n", ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) * sizeof( char ) );
+// reporterr( "size = %d\n", ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) );
+// reporterr( "treetmp = %p\n", treetmp );
+ tt = realloc( treetmp, ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) * sizeof( char ) ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
+ if( tt == NULL )
{
- pt11 = pt1;
- pt22 = pt2;
+ reporterr( "Cannot allocate treetmp\n" );
+ exit( 1 );
}
- for( intpt2=pt11; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- for( intpt2=pt22; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- *intpt = -1;
+ treetmp = tt;
+// reporterr( "i=%d\n", i );
+// reporterr( "part1=%s\n", tree[0] );
+// reporterr( "part2=%s\n", tree[i+1] );
+// reporterr( "size = %d, %d\n", strlen( tree[0] ), strlen( tree[i+1] ) );
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[i][0], tree[jm], len[i][1] );
+ free( tree[im] );
+ free( tree[jm] );
+ tree[jm] = calloc( strlen( treetmp )+1, sizeof( char ) );
+ tree[im] = NULL;
+ if( tree[jm] == NULL )
+ {
+ reporterr( "Cannot reallocate tree!\n" );
+ exit( 1 );
+ }
+ strcpy( tree[jm], treetmp );
+#endif
}
+ ll += l;
+ }
+ if( treeout )
+ {
+ posinit += sprintf( instanttree+posinit, "%s:%7.5f)", tree[jm], ll-l );
+ fp = fopen( "infile.tree", "w" );
+// fprintf( fp, "%s;\n", treetmp );
+// fprintf( fp, "#by createchain\n" );
+ fprintf( fp, "%s;\n", instanttree );
+ fclose( fp );
+ FreeCharMtx( tree );
+ free( nametmp );
+ free( instanttree );
+ }
- minscore *= 0.5;
+ fp = fopen( "_guidetree", "w" );
+ if( !fp )
+ {
+ reporterr( "cannot open _guidetree\n" );
+ exit( 1 );
+ }
+ for( i=0; i<nseq-1; i++ )
+ fprintf( fp, "%d %d %f %f\n", topol[i][0][0]+1, topol[i][1][0]+1, len[i][0], len[i][1] );
+ fclose( fp );
- len[k][0] = ( minscore - tmptmplen[im] );
- len[k][1] = ( minscore - tmptmplen[jm] );
+ free( order );
- tmptmplen[im] = minscore;
+}
+#else
+void createchain( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int shuffle, int seed )
+{
+ FILE *fp;
+ int i, j;
+ double l, ll;
+ int treelen;
+ char **tree;
+ char *instanttree;
+ int posinit;
+// char *treetmp, *tt;
+ char *nametmp, *nameptr, *tmpptr;
+ char namec;
+ int *order;
+ int im, jm;
+
+ if( treeout )
+ {
+// treetmp = NULL;
+ nametmp = AllocateCharVec( 1000 ); // nagasugi
+ tree = AllocateCharMtx( nseq, 0 );
+
+ treelen = nseq;
+ for( i=0; i<nseq; i++ )
+ {
+
+ for( j=0; j<999; j++ ) nametmp[j] = 0;
+ for( j=0; j<999; j++ )
+ {
+ namec = name[i][j];
+ if( namec == 0 )
+ break;
+ else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
+ nametmp[j] = namec;
+ else
+ nametmp[j] = '_';
+ }
+ nametmp[j] = 0;
+// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
+ if( tree[i] == NULL )
+ {
+ reporterr( "Cannot allocate tree!\n" );
+ exit( 1 );
+ }
+ sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
+ treelen += strlen( tree[i] ) + 20;
- hist[im] = k;
- nmemar[im] = nmemim + nmemjm;
+ }
- mindisfrom[im] = 999.9;
- for( acpti=ac; acpti!=NULL; acpti=acpti->next )
- {
- i = acpti->pos;
- if( i != im && i != jm )
- {
- if( i < im )
- {
- miniim = i;
- maxiim = im;
- minijm = i;
- maxijm = jm;
- }
- else if( i < jm )
- {
- miniim = im;
- maxiim = i;
- minijm = i;
- maxijm = jm;
- }
- else
- {
- miniim = im;
- maxiim = i;
- minijm = jm;
- maxijm = i;
- }
- eff0 = eff[miniim][maxiim-miniim];
- eff1 = eff[minijm][maxijm-minijm];
-#if 0
- tmpfloat = eff[miniim][maxiim-miniim] =
- MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
-#else
- tmpfloat = eff[miniim][maxiim-miniim] =
- (clusterfuncpt[0])( eff0, eff1 );
-#endif
- if( tmpfloat < mindisfrom[i] )
- {
- mindisfrom[i] = tmpfloat;
- nearest[i] = im;
- }
- if( tmpfloat < mindisfrom[im] )
- {
- mindisfrom[im] = tmpfloat;
- nearest[im] = i;
- }
- if( nearest[i] == jm )
- {
- nearest[i] = im;
- }
- }
- }
+ instanttree = calloc( treelen, sizeof( char ) );
+ posinit = 0;
+ for( i=0; i<nseq-1; i++ )
+ {
+ instanttree[i] = '(';
+ posinit++;
+ }
- sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
- strcpy( tree[im], treetmp );
+ }
- acjmprev = ac[jm].prev;
- acjmnext = ac[jm].next;
- acjmprev->next = acjmnext;
- if( acjmnext != NULL )
- acjmnext->prev = acjmprev;
- free( (void *)eff[jm] ); eff[jm] = NULL;
-#if 1 // muscle seems to miss this.
- for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+ order = calloc( nseq, sizeof( int ) );
+ for( i=0; i<nseq; i++ ) order[i] = i;
+
+ srand( seed );
+ if( shuffle ) stringshuffle( order, nseq );
+
+
+ ll = l = 2.0 / nseq;
+
+ for( i=0; i<nseq-1; i++ )
+ {
+ im = order[i];
+ jm = order[i+1];
+
+ topol[i][0] = (int *)realloc( topol[i][0], ( i + 2 ) * sizeof( int ) );
+ topol[i][1] = (int *)realloc( topol[i][1], ( 2 ) * sizeof( int ) );
+
+ for( j=0; j<i; j++ )
+ topol[i][0][j] = order[j];
+ topol[i][0][i] = im;
+ topol[i][0][i+1] = -1;
+
+ topol[i][1][0] = jm;
+ topol[i][1][1] = -1;
+
+ len[i][0] = l;
+ len[i][1] = ll;
+ ll += l;
+
+ if( dep )
{
- i = acpti->pos;
- if( nearest[i] == im )
+ dep[i].child0 = i-1;
+ dep[i].child1 = -1;
+ dep[i].distfromtip = ll;
+ }
+
+ if( treeout )
+ {
+ if( i == 0 )
{
-// fprintf( stderr, "calling setnearest\n" );
- setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
+ posinit += sprintf( instanttree+posinit, "%s:%7.5f,", tree[im], len[i][0] );
+// reporterr( "instanttree = %s\n", instanttree );
}
- }
+ else if ( i == nseq-2 )
+ {
+ posinit += sprintf( instanttree+posinit, "%s:%7.5f):%7.5f,", tree[im], len[i-1][1], len[i-1][0] );
+ posinit += sprintf( instanttree+posinit, "%s:%7.5f)", tree[jm], len[i][1] );
+ }
+ else
+ {
+ posinit += sprintf( instanttree+posinit, "%s:%7.5f):%7.5f,", tree[im], len[i-1][1], len[i-1][0] );
+// reporterr( "instanttree (in loop) = %s\n", instanttree );
+ }
+#if 0
+ if( i % 1000 == 0 ) reporterr( "\r%d/%d", i, nseq );
+// reporterr( "size = %d\n", ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) * sizeof( char ) );
+// reporterr( "size = %d\n", ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) );
+// reporterr( "treetmp = %p\n", treetmp );
+ tt = realloc( treetmp, ( strlen( tree[im] ) + strlen( tree[jm] ) + 100 ) * sizeof( char ) ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
+ if( tt == NULL )
+ {
+ reporterr( "Cannot allocate treetmp\n" );
+ exit( 1 );
+ }
+ treetmp = tt;
+// reporterr( "i=%d\n", i );
+// reporterr( "part1=%s\n", tree[0] );
+// reporterr( "part2=%s\n", tree[i+1] );
+// reporterr( "size = %d, %d\n", strlen( tree[0] ), strlen( tree[i+1] ) );
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[i][0], tree[jm], len[i][1] );
+ free( tree[im] );
+ free( tree[jm] );
+ tree[jm] = calloc( strlen( treetmp )+1, sizeof( char ) );
+ tree[im] = NULL;
+ if( tree[jm] == NULL )
+ {
+ reporterr( "Cannot reallocate tree!\n" );
+ exit( 1 );
+ }
+ strcpy( tree[jm], treetmp );
#endif
+ }
+ }
+ if( treeout )
+ {
+ fp = fopen( "infile.tree", "w" );
+// fprintf( fp, "%s;\n", treetmp );
+// fprintf( fp, "#by createchain\n" );
+ fprintf( fp, "%s;\n", instanttree );
+ fclose( fp );
+ FreeCharMtx( tree );
+ free( nametmp );
+ free( instanttree );
+ }
+ fp = fopen( "_guidetree", "w" );
+ if( !fp )
+ {
+ reporterr( "cannot open _guidetree\n" );
+ exit( 1 );
+ }
+#if CANONICALTREEFORMAT
+ for( i=0; i<nseq-1; i++ )
+ fprintf( fp, "%d %d %f %f\n", topol[i][0][0]+1, topol[i][1][0]+1, len[i][0], len[i][1] );
+#else
+ k = topol[0][0][0];
+ for( i=0; i<nseq-1; i++ )
+ {
+ jm = topol[i][1][0];
-#if 0
- fprintf( stdout, "vSTEP-%03d:\n", k+1 );
- fprintf( stdout, "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
- fprintf( stdout, "\n" );
- fprintf( stdout, "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
- fprintf( stdout, "\n" );
+ if( jm > k )
+ {
+ fprintf( fp, "%d %d %f %f\n", k+1, jm+1, len[i][0], len[i][1] );
+ }
+ else
+ {
+ fprintf( fp, "%d %d %f %f\n", jm+1, k+1, len[i][1], len[i][0] );
+ k = jm;
+ }
+ }
#endif
- }
- fp = fopen( "infile.tree", "w" );
- fprintf( fp, "%s\n", treetmp );
fclose( fp );
-
- FreeCharMtx( tree );
- free( treetmp );
- free( nametmp );
- free( (void *)tmptmplen ); tmptmplen = NULL;
- free( hist ); hist = NULL;
- free( (char *)ac ); ac = NULL;
- free( (void *)nmemar ); nmemar = NULL;
- free( mindisfrom );
- free( nearest );
-
+ free( order );
}
+#endif
-//void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name )
-void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name )
+void loadtree( int nseq, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
- double tmpfloat;
- double eff1, eff0;
- static double *tmptmplen = NULL;
- static int *hist = NULL;
- static Bchain *ac = NULL;
+ int *hist = NULL;
+ Bchain *ac = NULL;
int im = -1, jm = -1;
Bchain *acjmnext, *acjmprev;
int prevnode;
Bchain *acpti;
int *pt1, *pt2, *pt11, *pt22;
- static int *nmemar;
+ int *nmemar;
int nmemim, nmemjm;
- double minscore;
- int *nearest = NULL; // by D.Mathog, a guess
- double *mindisfrom = NULL; // by D.Mathog, a guess
- static char **tree;
- static char *treetmp;
- static char *nametmp, *nameptr, *tmpptr;
+ char **tree;
+ char *treetmp;
+ char *nametmp, *nameptr, *tmpptr;
+ char namec;
FILE *fp;
- double (*clusterfuncpt[1])(double,double);
-
+ int node[2];
+ double *height;
- sueff1_double = 1 - SUEFF;
- sueff05_double = SUEFF * 0.5;
- if ( treemethod == 'X' )
- clusterfuncpt[0] = cluster_mix_double;
- else if ( treemethod == 'E' )
- clusterfuncpt[0] = cluster_average_double;
- else if ( treemethod == 'q' )
- clusterfuncpt[0] = cluster_minimum_double;
- else
+ fp = fopen( "_guidetree", "r" );
+ if( !fp )
{
- fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
+ reporterr( "cannot open _guidetree\n" );
exit( 1 );
}
+
+ reporterr( "Loading a tree\n" );
+
if( !hist )
{
- hist = AllocateIntVec( njob );
- tmptmplen = AllocateDoubleVec( njob );
- ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
- nmemar = AllocateIntVec( njob );
- mindisfrom = AllocateDoubleVec( njob );
- nearest = AllocateIntVec( njob );
- treetmp = AllocateCharVec( njob*150 );
- nametmp = AllocateCharVec( 91 );
- tree = AllocateCharMtx( njob, njob*150 );
+ hist = AllocateIntVec( nseq );
+ ac = (Bchain *)malloc( nseq * sizeof( Bchain ) );
+ nmemar = AllocateIntVec( nseq );
+// treetmp = AllocateCharVec( nseq*50 );
+ if( dep ) height = AllocateFloatVec( nseq );
}
-
- for( i=0; i<nseq; i++ )
+ if( treeout )
{
- for( j=0; j<90; j++ ) nametmp[j] = 0;
- for( j=0; j<90; j++ )
+ treetmp = NULL;
+ nametmp = AllocateCharVec( 1000 ); // nagasugi
+// tree = AllocateCharMtx( nseq, nseq*50 );
+ tree = AllocateCharMtx( nseq, 0 );
+
+ for( i=0; i<nseq; i++ )
{
- if( name[i][j] == 0 )
- break;
- else if( isalnum( name[i][j] ) )
- nametmp[j] = name[i][j];
+ for( j=0; j<999; j++ ) nametmp[j] = 0;
+ for( j=0; j<999; j++ )
+ {
+ namec = name[i][j];
+ if( namec == 0 )
+ break;
+ else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
+ nametmp[j] = namec;
+ else
+ nametmp[j] = '_';
+ }
+ nametmp[j] = 0;
+// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
else
- nametmp[j] = '_';
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
+ if( tree[i] == NULL )
+ {
+ reporterr( "Cannot allocate tree!\n" );
+ exit( 1 );
+ }
+ sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
}
- nametmp[90] = 0;
-// sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 );
- if( outnumber )
- nameptr = strstr( nametmp, "_numo_e" ) + 8;
- else
- nameptr = nametmp + 1;
-
- if( (tmpptr=strstr( nameptr, "_oripos__" )) ) nameptr = tmpptr + 9; // = -> _ no tame
- sprintf( tree[i], "%d_%.60s", i+1, nameptr );
}
+
for( i=0; i<nseq; i++ )
{
ac[i].next = ac+i+1;
}
ac[nseq-1].next = NULL;
- for( i=0; i<nseq; i++ ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
- for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
for( i=0; i<nseq; i++ )
{
hist[i] = -1;
nmemar[i] = 1;
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
for( k=0; k<nseq-1; k++ )
{
- if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
-
+ if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
+#if 0
minscore = 999.9;
for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
i = acpti->pos;
-// fprintf( stderr, "k=%d i=%d\n", k, i );
+// reporterr( "k=%d i=%d\n", k, i );
if( mindisfrom[i] < minscore ) // muscle
{
im = i;
{
j=jm; jm=im; im=j;
}
+#else
+ len[k][0] = len[k][1] = -1.0;
+ loadtreeoneline( node, len[k], fp );
+ im = node[0];
+ jm = node[1];
+
+// if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL )
+ if( im > nseq-1 || jm > nseq-1 )
+ {
+ reporterr( "\n\nCheck the guide tree.\n" );
+ reporterr( "im=%d, jm=%d\n", im+1, jm+1 );
+ reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
+ exit( 1 );
+ }
+
+
+ if( len[k][0] == -1.0 || len[k][1] == -1.0 )
+ {
+ reporterr( "\n\nERROR: Branch length is not given.\n" );
+ exit( 1 );
+ }
+ if( len[k][0] < 0.0 ) len[k][0] = 0.0;
+ if( len[k][1] < 0.0 ) len[k][1] = 0.0;
+
+
+#endif
prevnode = hist[im];
+ if( dep ) dep[k].child0 = prevnode;
nmemim = nmemar[im];
-// intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
- intpt = topol[k][0];
+
+// reporterr( "prevnode = %d, nmemim = %d\n", prevnode, nmemim );
+
+ intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
- prevnode = hist[jm];
+
nmemjm = nmemar[jm];
-// intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
- intpt = topol[k][1];
+ prevnode = hist[jm];
+ if( dep ) dep[k].child1 = prevnode;
+
+// reporterr( "prevnode = %d, nmemjm = %d\n", prevnode, nmemjm );
+
+ intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
+ if( !intpt )
+ {
+ reporterr( "Cannot reallocate topol\n" );
+ exit( 1 );
+ }
if( prevnode == -1 )
{
*intpt++ = jm;
*intpt = -1;
}
- minscore *= 0.5;
- len[k][0] = ( minscore - tmptmplen[im] );
- len[k][1] = ( minscore - tmptmplen[jm] );
+// len[k][0] = ( minscore - tmptmplen[im] );
+// len[k][1] = ( minscore - tmptmplen[jm] );
+// len[k][0] = -1;
+// len[k][1] = -1;
- tmptmplen[im] = minscore;
-
hist[im] = k;
nmemar[im] = nmemim + nmemjm;
- mindisfrom[im] = 999.9;
+// mindisfrom[im] = 999.9;
for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
i = acpti->pos;
minijm = jm;
maxijm = i;
}
- eff0 = eff[miniim][maxiim];
- eff1 = eff[minijm][maxijm];
-#if 0
- tmpfloat = eff[miniim][maxiim] =
- MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
-#else
- tmpfloat = eff[miniim][maxiim] =
- (clusterfuncpt[0])( eff0, eff1 );
-#endif
- if( tmpfloat < mindisfrom[i] )
- {
- mindisfrom[i] = tmpfloat;
- nearest[i] = im;
- }
- if( tmpfloat < mindisfrom[im] )
- {
- mindisfrom[im] = tmpfloat;
- nearest[im] = i;
- }
- if( nearest[i] == jm )
- {
- nearest[i] = im;
- }
}
}
- sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
- strcpy( tree[im], treetmp );
+ if( treeout )
+ {
+ treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
+ if( !treetmp )
+ {
+ reporterr( "Cannot allocate treetmp\n" );
+ exit( 1 );
+ }
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ free( tree[im] );
+ free( tree[jm] );
+ tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
+ tree[jm] = NULL;
+ if( tree[im] == NULL )
+ {
+ reporterr( "Cannot reallocate tree!\n" );
+ exit( 1 );
+ }
+ strcpy( tree[im], treetmp );
+ }
+
+// reporterr( "im,jm=%d,%d\n", im, jm );
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
acjmprev->next = acjmnext;
acjmnext->prev = acjmprev;
// free( (void *)eff[jm] ); eff[jm] = NULL;
-#if 1 // muscle seems to miss this.
+#if 0 // muscle seems to miss this.
for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
i = acpti->pos;
if( nearest[i] == im )
{
-// fprintf( stderr, "calling setnearest\n" );
- setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i );
+// reporterr( "calling setnearest\n" );
+// setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
}
}
#endif
#if 0
- fprintf( stdout, "vSTEP-%03d:\n", k+1 );
- fprintf( stdout, "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
- fprintf( stdout, "\n" );
- fprintf( stdout, "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
- fprintf( stdout, "\n" );
+ fprintf( stderr, "vSTEP-%03d:\n", k+1 );
+ fprintf( stderr, "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i]+1 );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i]+1 );
+ fprintf( stderr, "\n" );
#endif
+
+ if( dep )
+ {
+ height[im] += len[k][0]; // for ig tree, 2015/Dec/25
+ dep[k].distfromtip = height[im]; // for ig tree, 2015/Dec/25
+// reporterr( "##### dep[%d].distfromtip = %f\n\n", k, height[im] );
+ }
+
+// reporterr( "dep[%d].child0 = %d\n", k, dep[k].child0 );
+// reporterr( "dep[%d].child1 = %d\n", k, dep[k].child1 );
+// reporterr( "dep[%d].distfromtip = %f\n", k, dep[k].distfromtip );
}
- fp = fopen( "infile.tree", "w" );
- fprintf( fp, "%s\n", treetmp );
fclose( fp );
- FreeCharMtx( tree );
- free( treetmp );
- free( nametmp );
- free( (void *)tmptmplen ); tmptmplen = NULL;
- free( hist ); hist = NULL;
- free( (char *)ac ); ac = NULL;
- free( (void *)nmemar ); nmemar = NULL;
- free( mindisfrom );
- free( nearest );
+ if( treeout )
+ {
+ fp = fopen( "infile.tree", "w" );
+ fprintf( fp, "%s;\n", treetmp );
+ fprintf( fp, "#by loadtree\n" );
+ fclose( fp );
+ FreeCharMtx( tree );
+ free( treetmp );
+ free( nametmp );
+ }
+
+ free( hist );
+ free( (char *)ac );
+ free( (void *)nmemar );
+ if( dep ) free( height );
+
+}
+
+int check_guidetreefile( int *seed, int *npick, double *limitram )
+{
+ char string[100];
+ char *sizestring;
+ FILE *fp;
+ double tanni;
+ double tmpd;
+
+ *seed = 0;
+ *npick = 200;
+ *limitram = 10.0 * 1000 * 1000 * 1000; // 10GB
+ fp = fopen( "_guidetree", "r" );
+ if( !fp )
+ {
+ reporterr( "cannot open _guidetree\n" );
+ exit( 1 );
+ }
+
+ fgets( string, 999, fp );
+ fclose( fp );
+
+ if( !strncmp( string, "shuffle", 7 ) )
+ {
+ sscanf( string+7, "%d", seed );
+ reporterr( "shuffle, seed=%d\n", *seed );
+ return( 's' );
+ }
+ else if( !strncmp( string, "pileup", 6 ) )
+ {
+ reporterr( "pileup.\n" );
+ return( 'p' );
+ }
+ else if( !strncmp( string, "auto", 4 ) )
+ {
+ sscanf( string+4, "%d %d", seed, npick );
+ reporterr( "auto, seed=%d, npick=%d\n", *seed, *npick );
+ if( *npick < 2 )
+ {
+ reporterr( "Check npick\n" );
+ exit( 1 );
+ }
+ return( 'a' );
+ }
+ else if( !strncmp( string, "test", 4 ) )
+ {
+ sscanf( string+4, "%d %d", seed, npick );
+ reporterr( "calc, seed=%d, npick=%d\n", *seed, *npick );
+ if( *npick < 2 )
+ {
+ reporterr( "Check npick\n" );
+ exit( 1 );
+ }
+ return( 't' );
+ }
+ else if( !strncmp( string, "compact", 7 ) )
+ {
+ sizestring = string + 7;
+ reporterr( "sizestring = %s\n", sizestring );
+ if( strchr( sizestring, 'k' ) || strchr( sizestring, 'k' ) ) tanni = 1.0 * 1000; // kB
+ else if( strchr( sizestring, 'M' ) || strchr( sizestring, 'm' ) ) tanni = 1.0 * 1000 * 1000; // GB
+ else if( strchr( sizestring, 'G' ) || strchr( sizestring, 'g' ) ) tanni = 1.0 * 1000 * 1000 * 1000; // GB
+ else if( strchr( sizestring, 'T' ) || strchr( sizestring, 't' ) ) tanni = 1.0 * 1000 * 1000 * 1000 * 1000; // TB
+ else
+ {
+ reporterr( "\nSpecify initial ram usage by '--initialramusage xGB'\n\n\n" );
+ exit( 1 );
+ }
+ sscanf( sizestring, "%lf", &tmpd );
+ *limitram = tmpd * tanni;
+ reporterr( "Initial RAM usage = %10.3fGB\n", *limitram/1000/1000/1000 );
+ return( 'c' );
+ }
+ else if( !strncmp( string, "very compact", 12 ) )
+ {
+ reporterr( "very compact.\n" );
+ return( 'C' );
+ }
+ else
+ {
+ reporterr( "loadtree.\n" );
+ return( 'l' );
+ }
+}
+
+
+static double sueff1, sueff05;
+//static double sueff1_double, sueff05_double;
+
+static double cluster_mix_double( double d1, double d2 )
+{
+ return( MIN( d1, d2 ) * sueff1 + ( d1 + d2 ) * sueff05 );
+}
+static double cluster_average_double( double d1, double d2 )
+{
+ return( ( d1 + d2 ) * 0.5 );
+}
+static double cluster_minimum_double( double d1, double d2 )
+{
+ return( MIN( d1, d2 ) );
+}
+#if 0
+static double cluster_mix_double( double d1, double d2 )
+{
+ return( MIN( d1, d2 ) * sueff1_double + ( d1 + d2 ) * sueff05_double );
+}
+static double cluster_average_double( double d1, double d2 )
+{
+ return( ( d1 + d2 ) * 0.5 );
+}
+static double cluster_minimum_double( double d1, double d2 )
+{
+ return( MIN( d1, d2 ) );
}
+#endif
+
+static void increaseintergroupdistanceshalfmtx( double **eff, int ngroup, int **groups, int nseq )
+{
+ int nwarned = 0;
+ int i, k, m, s1, s2, sl, ss;
+ int *others, *tft;
+ double maxdist, *dptr, dtmp;
+ tft = calloc( nseq, sizeof( int * ) );
+ others = calloc( nseq, sizeof( int * ) );
+
+// for( m=0; m<nseq-1; m++ ) for( k=m+1; k<nseq; k++ )
+// reporterr( "mtx[%d][%d] originally = %f (maxdist=%f)\n", m, k, eff[m][k-m], maxdist );
+
+ reporterr( "\n" ); // Hitsuyou desu.
+ for( i=0; i<ngroup; i++ )
+ {
+ if( groups[i][1] == -1 ) continue;
+
+ for( m=0; m<nseq; m++ ) tft[m] = 0;
+ for( m=0; (s1=groups[i][m])>-1; m++ ) tft[s1] = 1;
+ for( m=0,k=0; m<nseq; m++ ) if( tft[m] == 0 ) others[k++] = m;
+ others[k] = -1;
+
+ maxdist = 0.0;
+ for( m=1; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=groups[i][k])>-1&&k<m; k++ )
+ {
+// reporterr( "m=%d, k=%d, s2=%d, s1=%d\n", m, k, s2, s1 );
+
+ if( s2 > s1 )
+ {
+ sl = s2; ss = s1;
+ }
+ else
+ {
+ sl = s1; ss = s2;
+ }
+ dtmp = eff[ss][sl-ss];
+ if( dtmp > maxdist ) maxdist = dtmp;
+ }
+// reporterr( "maxdist = %f\n", maxdist );
+
+ for( m=0; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=others[k])>-1; k++ )
+ {
+ if( s2 > s1 )
+ {
+ sl = s2; ss = s1;
+ }
+ else
+ {
+ sl = s1; ss = s2;
+ }
+ dptr = eff[ss] + sl-ss;
+ if( *dptr < maxdist )
+ {
+ if( *dptr < 0.5 && nwarned++ < 100 ) reporterr( "# Sequences %d and %d seem to be closely related, but are not in the same sub MSA (%d) in your setting.\n", s2+1, s1+1, i+1 );
+ *dptr = maxdist;
+ }
+ }
+// for( m=0; m<nseq-1; m++ ) for( k=m+1; k<nseq; k++ )
+// reporterr( "mtx[%d][%d] after modification%d = %f (maxdist=%f)\n", m, k, i, eff[m][k-m], maxdist );
+ }
+ if( nwarned > 100 ) reporterr( "# Sequenc.... (more pairs)\n" );
+
+ free( tft );
+ free( others );
+}
+
+static void increaseintergroupdistancesfullmtx( double **eff, int ngroup, int **groups, int nseq )
+{
+ int nwarned = 0;
+ int i, k, m, s1, s2, sl, ss;
+ int *others, *tft;
+ double maxdist, *dptr, dtmp;
+ tft = calloc( nseq, sizeof( int * ) );
+ others = calloc( nseq, sizeof( int * ) );
+
+ reporterr( "\n" ); // Hitsuyou desu.
+ for( i=0; i<ngroup; i++ )
+ {
+ if( groups[i][1] == -1 ) continue;
+
+ for( m=0; m<nseq; m++ ) tft[m] = 0;
+ for( m=0; (s1=groups[i][m])>-1; m++ ) tft[s1] = 1;
+ for( m=0,k=0; m<nseq; m++ ) if( tft[m] == 0 ) others[k++] = m;
+ others[k] = -1;
+
+ maxdist = 0.0;
+ for( m=1; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=groups[i][k])>-1&&k<m; k++ )
+ {
+ if( s2 > s1 )
+ {
+ sl = s2; ss = s1;
+ }
+ else
+ {
+ sl = s1; ss = s2;
+ }
+ dtmp = eff[ss][sl];
+ if( dtmp > maxdist ) maxdist = dtmp;
+ }
+
+// reporterr( "maxdist = %f\n", maxdist );
+
+ for( m=0; (s2=groups[i][m])>-1; m++ ) for( k=0; (s1=others[k])>-1; k++ )
+ {
+ if( s2 > s1 )
+ {
+ sl = s2; ss = s1;
+ }
+ else
+ {
+ sl = s1; ss = s2;
+ }
+ dptr = eff[ss] + sl;
+ if( *dptr < maxdist )
+ {
+ if( *dptr < 0.5 && nwarned++ < 100 ) reporterr( "# Sequences %d and %d seem to be closely related, but are not in the same sub MSA (%d) in your setting.\n", s2+1, s1+1, i+1 );
+ *dptr = maxdist;
+ }
+ }
+ }
+ if( nwarned > 100 ) reporterr( "# Sequenc.... (more pairs)\n" );
+// for( m=0; m<nseq-1; m++ ) for( k=m+1; k<nseq; k++ )
+// reporterr( "mtx[%d][%d] after modification = %f (maxdist=%f)\n", m, k, eff[m][k], maxdist );
+ free( tft );
+ free( others );
+}
-void fixed_musclesupg_float_realloc_nobk_halfmtx( int nseq, float **eff, int ***topol, float **len, Treedep *dep )
+void fixed_supg_double_realloc_nobk_halfmtx_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int ngroup, int **groups, int efffree )
{
int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
- float tmpfloat;
- float eff1, eff0;
- static float *tmptmplen = NULL;
- static int *hist = NULL;
- static Bchain *ac = NULL;
+ double tmpdouble;
+ double eff1, eff0;
+ double *tmptmplen = NULL; //static?
+ int *hist = NULL; //static?
+ Bchain *ac = NULL; //static?
int im = -1, jm = -1;
Bchain *acjmnext, *acjmprev;
int prevnode;
- Bchain *acpti;
+ Bchain *acpti, *acptj;
int *pt1, *pt2, *pt11, *pt22;
- static int *nmemar;
+ int *nmemar; //static?
int nmemim, nmemjm;
- float minscore;
-// float sueff1 = 1 - SUEFF;
-// float sueff05 = SUEFF * 0.5;
- int *nearest = NULL; // by Mathog, a guess
- float *mindisfrom = NULL; // by Mathog, a guess
- float (*clusterfuncpt[1])(float,float);
-
-
- sueff1 = 1 - SUEFF;
- sueff05 = SUEFF * 0.5;
+ double minscore;
+ int *nearest = NULL; // by D.Mathog, a guess
+ double *mindisfrom = NULL; // by D.Mathog, a guess
+ char **tree; //static?
+ char *treetmp; //static?
+ char *nametmp, *nameptr, *tmpptr; //static?
+ FILE *fp;
+ double (*clusterfuncpt[1])(double,double);
+ char namec;
+ int *testtopol, **inconsistent;
+ int **inconsistentpairlist;
+ int ninconsistentpairs;
+ int *warned;
+ int allinconsistent;
+ int firsttime;
+
+ increaseintergroupdistanceshalfmtx( eff, ngroup, groups, nseq );
+
+ sueff1 = 1 - (double)sueff_global;
+ sueff05 = (double)sueff_global * 0.5;
if ( treemethod == 'X' )
- clusterfuncpt[0] = cluster_mix_float;
+ clusterfuncpt[0] = cluster_mix_double;
else if ( treemethod == 'E' )
- clusterfuncpt[0] = cluster_average_float;
+ clusterfuncpt[0] = cluster_average_double;
else if ( treemethod == 'q' )
- clusterfuncpt[0] = cluster_minimum_float;
+ clusterfuncpt[0] = cluster_minimum_double;
else
{
- fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
+ reporterr( "Unknown treemethod, %c\n", treemethod );
exit( 1 );
}
nmemar = AllocateIntVec( njob );
mindisfrom = AllocateFloatVec( njob );
nearest = AllocateIntVec( njob );
+// treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
+ treetmp = NULL; // kentou 2013/06/12
+ nametmp = AllocateCharVec( 1000 ); // nagasugi
+// tree = AllocateCharMtx( njob, njob*600 );
+ tree = AllocateCharMtx( njob, 0 );
+ testtopol = AllocateIntVec( njob + 1 );
+ inconsistent = AllocateIntMtx( njob, njob ); // muda
+// inconsistentpairlist = AllocateIntMtx( njob*(njob-1)/2+1, 2 ); // muda!
+ inconsistentpairlist = AllocateIntMtx( 1, 2 );
+// reporterr( "allocating inconsistentpairlist\n" );
+ warned = AllocateIntVec( ngroup );
}
for( i=0; i<nseq; i++ )
{
+ for( j=0; j<999; j++ ) nametmp[j] = 0;
+ for( j=0; j<999; j++ )
+ {
+ namec = name[i][j];
+ if( namec == 0 )
+ break;
+ else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
+ nametmp[j] = namec;
+ else
+ nametmp[j] = '_';
+ }
+ nametmp[j] = 0;
+// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
+ if( tree[i] == NULL )
+ {
+ reporterr( "Cannot allocate tree!\n" );
+ exit( 1 );
+ }
+ sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
+ }
+ for( i=0; i<nseq; i++ )
+ {
ac[i].next = ac+i+1;
ac[i].prev = ac+i-1;
ac[i].pos = i;
nmemar[i] = 1;
}
- fprintf( stderr, "\n" );
+ reporterr( "\n" );
+ ninconsistentpairs = 0;
for( k=0; k<nseq-1; k++ )
{
- if( k % 10 == 0 ) fprintf( stderr, "\r% 5d / %d", k, nseq );
-
- minscore = 999.9;
- for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
- {
- i = acpti->pos;
-// fprintf( stderr, "k=%d i=%d\n", k, i );
- if( mindisfrom[i] < minscore ) // muscle
- {
- im = i;
- minscore = mindisfrom[i];
- }
- }
- jm = nearest[im];
- if( jm < im )
- {
- j=jm; jm=im; im=j;
- }
+ if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
-
- prevnode = hist[im];
- if( dep ) dep[k].child0 = prevnode;
- nmemim = nmemar[im];
- intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
- if( prevnode == -1 )
+ for( i=0; i<ninconsistentpairs; i++ ) inconsistent[inconsistentpairlist[i][0]][inconsistentpairlist[i][1]] = 0;
+// for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0; // osoi!!!
+ ninconsistentpairs = 0;
+ firsttime = 1;
+ while( 1 )
{
- *intpt++ = im;
- *intpt = -1;
- }
- else
- {
- pt1 = topol[prevnode][0];
- pt2 = topol[prevnode][1];
- if( *pt1 > *pt2 )
+ if( firsttime )
{
- pt11 = pt2;
- pt22 = pt1;
+ firsttime = 0;
+ minscore = 999.9;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+// reporterr( "k=%d i=%d\n", k, i );
+ if( mindisfrom[i] < minscore ) // muscle
+ {
+ im = i;
+ minscore = mindisfrom[i];
+ }
+ }
+ jm = nearest[im];
+ if( jm < im )
+ {
+ j=jm; jm=im; im=j;
+ }
+ }
+ else
+ {
+ minscore = 999.9;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+// reporterr( "k=%d i=%d\n", k, i );
+ for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
+ {
+ j = acptj->pos;
+ if( !inconsistent[i][j] && (tmpdouble=eff[i][j-i]) < minscore )
+ {
+ minscore = tmpdouble;
+ im = i; jm = j;
+ }
+ }
+ for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next )
+ {
+ j = acptj->pos;
+ if( !inconsistent[j][i] && (tmpdouble=eff[j][i-j]) < minscore )
+ {
+ minscore = tmpdouble;
+ im = j; jm = i;
+ }
+ }
+ }
+ }
+
+
+ allinconsistent = 1;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
+ {
+ for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
+ {
+ if( inconsistent[acpti->pos][acptj->pos] == 0 )
+ {
+ allinconsistent = 0;
+ goto exitloop_f;
+ }
+ }
+ }
+ exitloop_f:
+
+ if( allinconsistent )
+ {
+ reporterr( "\n\n\nPlease check whether the grouping is possible.\n\n\n" );
+ exit( 1 );
+ }
+#if 1
+ intpt = testtopol;
+ prevnode = hist[im];
+ if( prevnode == -1 )
+ {
+ *intpt++ = im;
+ }
+ else
+ {
+ for( intpt2=topol[prevnode][0]; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=topol[prevnode][1]; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ }
+
+ prevnode = hist[jm];
+ if( prevnode == -1 )
+ {
+ *intpt++ = jm;
+ }
+ else
+ {
+ for( intpt2=topol[prevnode][0]; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=topol[prevnode][1]; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ }
+ *intpt = -1;
+// reporterr( "testtopol = \n" );
+// for( i=0; testtopol[i]>-1; i++ ) reporterr( " %03d", testtopol[i]+1 );
+// reporterr( "\n" );
+#endif
+ for( i=0; i<ngroup; i++ )
+ {
+// reporterr( "groups[%d] = \n", i );
+// for( j=0; groups[i][j]>-1; j++ ) reporterr( " %03d", groups[i][j]+1 );
+// reporterr( "\n" );
+ if( overlapmember( groups[i], testtopol ) )
+ {
+ if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) )
+ {
+ if( !warned[i] )
+ {
+ warned[i] = 1;
+ reporterr( "\n###################################################################\n" );
+ reporterr( "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 );
+ reporterr( "###################################################################\n" );
+ }
+ inconsistent[im][jm] = 1;
+
+ inconsistentpairlist = realloc( inconsistentpairlist, (ninconsistentpairs+1)*sizeof( int * ) );
+ inconsistentpairlist[ninconsistentpairs] = malloc( sizeof( int ) * 2 );
+ reporterr( "reallocating inconsistentpairlist, size=%d\n", ninconsistentpairs+1 );
+ inconsistentpairlist[ninconsistentpairs][0] = im;
+ inconsistentpairlist[ninconsistentpairs][1] = jm;
+ ninconsistentpairs++;
+ break;
+ }
+ }
+ }
+ if( i == ngroup )
+ {
+// reporterr( "OK\n" );
+ break;
+ }
+ }
+
+
+ prevnode = hist[im];
+ if( dep ) dep[k].child0 = prevnode;
+ nmemim = nmemar[im];
+ intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
+ if( prevnode == -1 )
+ {
+ *intpt++ = im;
+ *intpt = -1;
+ }
+ else
+ {
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+ pt22 = pt1;
}
else
{
intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
if( !intpt )
{
- fprintf( stderr, "Cannot reallocate topol\n" );
+ reporterr( "Cannot reallocate topol\n" );
exit( 1 );
}
if( prevnode == -1 )
len[k][0] = ( minscore - tmptmplen[im] );
len[k][1] = ( minscore - tmptmplen[jm] );
+ if( len[k][0] < 0.0 ) len[k][0] = 0.0;
+ if( len[k][1] < 0.0 ) len[k][1] = 0.0;
+
+ if( dep ) dep[k].distfromtip = minscore;
+// reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore );
tmptmplen[im] = minscore;
nmemar[im] = nmemim + nmemjm;
mindisfrom[im] = 999.9;
+ eff[im][jm-im] = 999.9;
for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
i = acpti->pos;
}
eff0 = eff[miniim][maxiim-miniim];
eff1 = eff[minijm][maxijm-minijm];
- tmpfloat = eff[miniim][maxiim-miniim] =
#if 0
+ tmpdouble = eff[miniim][maxiim-miniim] =
MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
#else
+ tmpdouble = eff[miniim][maxiim-miniim] =
(clusterfuncpt[0])( eff0, eff1 );
#endif
- if( tmpfloat < mindisfrom[i] )
+#if 1
+ if( tmpdouble < mindisfrom[i] )
{
- mindisfrom[i] = tmpfloat;
+ mindisfrom[i] = tmpdouble;
nearest[i] = im;
}
- if( tmpfloat < mindisfrom[im] )
+ if( tmpdouble < mindisfrom[im] )
{
- mindisfrom[im] = tmpfloat;
+ mindisfrom[im] = tmpdouble;
nearest[im] = i;
}
if( nearest[i] == jm )
{
nearest[i] = im;
}
+#endif
}
}
-// fprintf( stderr, "im,jm=%d,%d\n", im, jm );
+ treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
+ if( !treetmp )
+ {
+ reporterr( "Cannot allocate treetmp\n" );
+ exit( 1 );
+ }
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ free( tree[im] );
+ free( tree[jm] );
+ tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
+ tree[jm] = NULL;
+ if( tree[im] == NULL )
+ {
+ reporterr( "Cannot reallocate tree!\n" );
+ exit( 1 );
+ }
+ strcpy( tree[im], treetmp );
+
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
acjmprev->next = acjmnext;
if( acjmnext != NULL )
acjmnext->prev = acjmprev;
- free( (void *)eff[jm] ); eff[jm] = NULL;
+ if( efffree )
+ {
+ free( (void *)eff[jm] ); eff[jm] = NULL;
+ }
#if 1 // muscle seems to miss this.
for( acpti=ac; acpti!=NULL; acpti=acpti->next )
i = acpti->pos;
if( nearest[i] == im )
{
-// fprintf( stderr, "calling setnearest\n" );
- setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
+ if( i < im )
+ {
+ miniim = i;
+ maxiim = im;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ }
+ if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
+ setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
}
}
#endif
#if 0
- fprintf( stdout, "vSTEP-%03d:\n", k+1 );
- fprintf( stdout, "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
- fprintf( stdout, "\n" );
- fprintf( stdout, "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
- fprintf( stdout, "\n" );
+ reporterr( "\noSTEP-%03d:\n", k+1 );
+ reporterr( "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) reporterr( " %03d", topol[k][0][i]+1 );
+ reporterr( "\n" );
+ reporterr( "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i]+1 );
+ reporterr( "\n\n" );
#endif
}
+ fp = fopen( "infile.tree", "w" );
+ fprintf( fp, "%s\n", treetmp );
+ fclose( fp );
+
+ free( tree[0] );
+ free( tree );
+ free( treetmp );
+ free( nametmp );
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
free( (void *)nmemar ); nmemar = NULL;
free( mindisfrom );
free( nearest );
+ free( testtopol );
+ FreeIntMtx( inconsistent );
+ FreeIntMtx( inconsistentpairlist );
+ free( warned );
}
+void makecompositiontable_global( int *table, int *pointt )
+{
+ int point;
+ while( ( point = *pointt++ ) != END_OF_VEC )
+ table[point]++;
+}
+typedef struct _resetnearestthread_arg
+{
+ int para;
+// int thread_no;
+ int im;
+ int nseq;
+ double **partmtx;
+ double *mindist;
+ int *nearest;
+ char **seq;
+ int **skiptable;
+ int *tselfscore;
+ int **pointt;
+ int *nlen;
+ double *result;
+ int *joblist;
+ Bchain **acpt;
+ Bchain *ac;
+#ifdef enablemultithread
+ pthread_mutex_t *mutex;
+#endif
+} resetnearestthread_arg_t;
+static void *msaresetnearestthread( void *arg )
+{
+ resetnearestthread_arg_t *targ = (resetnearestthread_arg_t *)arg;
+// int thread_no = targ->thread_no;
+ int para = targ->para;
+ int im = targ->im;
+ int nseq = targ->nseq;
+ double **partmtx = targ->partmtx;
+ double *mindist = targ->mindist;
+ int *nearest = targ->nearest;
+ char **seq = targ->seq;
+ int **skiptable = targ->skiptable;
+ int *tselfscore = targ->tselfscore;
+ double *result = targ->result;
+ int *joblist = targ->joblist;
+ Bchain **acpt = targ->acpt;
+ Bchain *ac = targ->ac;
+
+ Bchain *acptbk;
+ Bchain *acptinit;
+ int i;
+ acptinit = *acpt;
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_lock( targ->mutex );
+#endif
+ if( *acpt == NULL )
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+ commonsextet_p( NULL, NULL );
+ return( NULL );
+ }
+ acptbk = *acpt;
+ *acpt = (*acpt)->next;
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+ i = acptbk->pos;
+ if( nearest[i] == im )
+ {
+ if( partmtx[im][i] > mindist[i] )
+ {
+ msaresetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, seq, skiptable, tselfscore, result, joblist );
+ }
+ }
+ }
+}
-void veryfastsupg_double_loadtop( int nseq, double **eff, int ***topol, double **len ) // BUG!!!
+static void *kmerresetnearestthread( void *arg )
{
- int i, k, miniim, maxiim, minijm, maxijm;
- int *intpt, *intpt2;
- double eff1, eff0;
- static double *tmptmplen = NULL;
- static int *hist = NULL;
- static Achain *ac = NULL;
- double minscore;
- static char **tree;
- static char *treetmp;
- int im = -1, jm = -1;
- int prevnode, acjmnext, acjmprev;
- int *pt1, *pt2, *pt11, *pt22;
- FILE *fp;
- int node[2];
- float dumfl[2];
+ resetnearestthread_arg_t *targ = (resetnearestthread_arg_t *)arg;
+// int thread_no = targ->thread_no;
+ int para = targ->para;
+ int im = targ->im;
+ int nseq = targ->nseq;
+ double **partmtx = targ->partmtx;
+ double *mindist = targ->mindist;
+ int *nearest = targ->nearest;
+ int *tselfscore = targ->tselfscore;
+ int **pointt = targ->pointt;
+ int *nlen = targ->nlen;
+ double *result = targ->result;
+ int *joblist = targ->joblist;
+ Bchain **acpt = targ->acpt;
+ Bchain *ac = targ->ac;
+
+ int *singlettable1;
+
+ Bchain *acptbk;
+ Bchain *acptinit;
- fp = fopen( "_guidetree", "r" );
- if( !fp )
- {
- fprintf( stderr, "cannot open _guidetree\n" );
- exit( 1 );
- }
+ int i;
- if( !hist )
- {
- treetmp = AllocateCharVec( njob*50 );
- tree = AllocateCharMtx( njob, njob*50 );
- hist = AllocateIntVec( njob );
- tmptmplen = (double *)malloc( njob * sizeof( double ) );
- ac = (Achain *)malloc( njob * sizeof( Achain ) );
- }
- for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
-
- for( i=0; i<nseq; i++ )
+ acptinit = *acpt;
+ while( 1 )
{
- ac[i].next = i+1;
- ac[i].prev = i-1;
-// ac[i].curr = i;
- }
- ac[nseq-1].next = -1;
-
- for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
- for( i=0; i<nseq; i++ ) hist[i] = -1;
-
- fprintf( stderr, "\n" );
- for( k=0; k<nseq-1; k++ )
- {
- if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
+#ifdef enablemultithread
+ if( para ) pthread_mutex_lock( targ->mutex );
+#endif
+ if( *acpt == NULL )
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+ commonsextet_p( NULL, NULL );
+ return( NULL );
+ }
+ acptbk = *acpt;
+ *acpt = (*acpt)->next;
-#if 0
- minscore = 99999.9;
- for( i=0; ac[i].next!=-1; i=ac[i].next )
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+ i = acptbk->pos;
+ if( nearest[i] == im )
{
- for( j=ac[i].next; j!=-1; j=ac[j].next )
- {
- tmpdouble = eff[i][j];
- if( tmpdouble < minscore )
+ if( partmtx[im][i] > mindist[i] )
+ {
+ if( pointt ) // kmer
{
- minscore = tmpdouble;
- im = i; jm = j;
+ singlettable1 = (int *)calloc( tsize, sizeof( int ) );
+ makecompositiontable_global( singlettable1, pointt[i] );
}
+ kmerresetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, tselfscore, pointt, nlen, singlettable1, result, joblist );
+ if( pointt ) free( singlettable1 ); singlettable1 = NULL;// kmer
+ if( pointt ) commonsextet_p( NULL, NULL );
}
}
-#else
- dumfl[0] = dumfl[1] = -1.0;
- loadtreeoneline( node, dumfl, fp );
- im = node[0];
- jm = node[1];
- minscore = eff[im][jm];
-
-// fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
+ }
+}
- if( dumfl[0] != -1.0 || dumfl[1] != -1.0 )
- {
- fprintf( stderr, "\n\nBranch length should not given.\n" );
- exit( 1 );
- }
+typedef struct _compactdistarrthread_arg
+{
+ int para;
+ int njob;
+// int thread_no;
+ int im;
+ int jm;
+ int *nlen;
+ char **seq;
+ int **skiptable;
+ int **pointt;
+ int *table1;
+ int *table2;
+ int *tselfscore;
+ Bchain **acpt;
+ int *posshared;
+ double *mindist;
+ double *newarr;
+ double **partmtx;
+ int *nearest;
+ int *joblist;
+#ifdef enablemultithread
+ pthread_mutex_t *mutex;
#endif
+} compactdistarrthread_arg_t;
+
+static void *verycompactkmerdistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau
+{
+ compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg;
+ int njob = targ->njob;
+ int para = targ->para;
+ int im = targ->im;
+ int jm = targ->jm;
+// int thread_no = targ->thread_no;
+ int *nlen = targ->nlen;
+ int **pointt = targ->pointt;
+ int *table1 = targ->table1;
+ int *table2 = targ->table2;
+ int *tselfscore = targ->tselfscore;
+ int *joblist = targ->joblist;
+ int *posshared = targ->posshared;
+ double *mindist = targ->mindist;
+ int *nearest = targ->nearest;
+// double **partmtx = targ->partmtx;
+ double *newarr = targ->newarr;
+ int i, posinjoblist, n;
+
+ double tmpdist1;
+ double tmpdist2;
+ double tmpdouble;
-// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
+// for( acpti=ac; acpti!=NULL; acpti=acpti->next )
- intpt = topol[k][0];
- prevnode = hist[im];
- if( prevnode == -1 )
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_lock( targ->mutex );
+#endif
+ if( *posshared >= njob ) // block no toki >=
{
- *intpt++ = im;
- *intpt = -1;
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+ commonsextet_p( NULL, NULL );
+ return( NULL );
}
- else
+ posinjoblist = *posshared;
+ *posshared += BLOCKSIZE;
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+
+ for( n=0; n<BLOCKSIZE&&posinjoblist<njob; n++ )
{
- pt1 = topol[prevnode][0];
- pt2 = topol[prevnode][1];
- if( *pt1 > *pt2 )
+ i = joblist[posinjoblist++];
+
+ if( i == im ) continue;
+ if( i == jm ) continue;
+
+// if( partmtx[im] )
+// tmpdist1 = partmtx[im][i];
+// else if( partmtx[i] )
+// tmpdist1 = partmtx[i][im];
+// else
+ tmpdist1 = distcompact( nlen[im], nlen[i], table1, pointt[i], tselfscore[im], tselfscore[i] );
+
+// if( partmtx[jm] )
+// tmpdist2 = partmtx[jm][i];
+// else if( partmtx[i] )
+// tmpdist2 = partmtx[i][jm];
+// else
+ tmpdist2 = distcompact( nlen[jm], nlen[i], table2, pointt[i], tselfscore[jm], tselfscore[i] );
+
+// if( seq )
+// {
+// tmpdist1 = distcompact_msa( seq[im], seq[i], skiptable[im], skiptable[i], tselfscore[im], tselfscore[i] );
+// tmpdist2 = distcompact_msa( seq[jm], seq[i], skiptable[jm], skiptable[i], tselfscore[jm], tselfscore[i] );
+// }
+// else
+// {
+// tmpdist1 = distcompact( nlen[im], nlen[i], table1, pointt[i], tselfscore[im], tselfscore[i] );
+// tmpdist2 = distcompact( nlen[jm], nlen[i], table2, pointt[i], tselfscore[jm], tselfscore[i] );
+// }
+ tmpdouble = cluster_mix_double( tmpdist1, tmpdist2 );
+ newarr[i] = tmpdouble;
+
+// if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i];
+
+ if( tmpdouble < mindist[i] )
{
- pt11 = pt2;
- pt22 = pt1;
+ mindist[i] = tmpdouble;
+ nearest[i] = im;
}
- else
+
+// if( tmpdouble < mindist[im] ) // koko deha muri
+// {
+// mindist[im] = tmpdouble;
+// nearest[im] = i;
+// }
+
+ if( nearest[i] == jm )
{
- pt11 = pt1;
- pt22 = pt2;
+ nearest[i] = im;
}
- for( intpt2=pt11; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- for( intpt2=pt22; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- *intpt = -1;
}
+ }
+}
- intpt = topol[k][1];
- prevnode = hist[jm];
- if( prevnode == -1 )
+static void *kmerdistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau
+{
+ compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg;
+ int njob = targ->njob;
+ int para = targ->para;
+ int im = targ->im;
+ int jm = targ->jm;
+// int thread_no = targ->thread_no;
+ int *nlen = targ->nlen;
+ int **pointt = targ->pointt;
+ int *table1 = targ->table1;
+ int *table2 = targ->table2;
+ int *tselfscore = targ->tselfscore;
+ int *joblist = targ->joblist;
+ int *posshared = targ->posshared;
+ double *mindist = targ->mindist;
+ int *nearest = targ->nearest;
+ double **partmtx = targ->partmtx;
+ double *newarr = targ->newarr;
+ int i, posinjoblist, n;
+
+ double tmpdist1;
+ double tmpdist2;
+ double tmpdouble;
+
+// for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_lock( targ->mutex );
+#endif
+ if( *posshared >= njob ) // block no toki >=
{
- *intpt++ = jm;
- *intpt = -1;
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+ commonsextet_p( NULL, NULL );
+ return( NULL );
}
- else
+ posinjoblist = *posshared;
+ *posshared += BLOCKSIZE;
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+
+ for( n=0; n<BLOCKSIZE&&posinjoblist<njob; n++ )
{
- pt1 = topol[prevnode][0];
- pt2 = topol[prevnode][1];
- if( *pt1 > *pt2 )
- {
- pt11 = pt2;
- pt22 = pt1;
- }
+ i = joblist[posinjoblist++];
+
+ if( i == im ) continue;
+ if( i == jm ) continue;
+
+ if( partmtx[im] )
+ tmpdist1 = partmtx[im][i];
+ else if( partmtx[i] )
+ tmpdist1 = partmtx[i][im];
+ else
+ tmpdist1 = distcompact( nlen[im], nlen[i], table1, pointt[i], tselfscore[im], tselfscore[i] );
+
+ if( partmtx[jm] )
+ tmpdist2 = partmtx[jm][i];
+ else if( partmtx[i] )
+ tmpdist2 = partmtx[i][jm];
else
+ tmpdist2 = distcompact( nlen[jm], nlen[i], table2, pointt[i], tselfscore[jm], tselfscore[i] );
+
+// if( seq )
+// {
+// tmpdist1 = distcompact_msa( seq[im], seq[i], skiptable[im], skiptable[i], tselfscore[im], tselfscore[i] );
+// tmpdist2 = distcompact_msa( seq[jm], seq[i], skiptable[jm], skiptable[i], tselfscore[jm], tselfscore[i] );
+// }
+// else
+// {
+// tmpdist1 = distcompact( nlen[im], nlen[i], table1, pointt[i], tselfscore[im], tselfscore[i] );
+// tmpdist2 = distcompact( nlen[jm], nlen[i], table2, pointt[i], tselfscore[jm], tselfscore[i] );
+// }
+ tmpdouble = cluster_mix_double( tmpdist1, tmpdist2 );
+ newarr[i] = tmpdouble;
+
+ if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i];
+
+ if( tmpdouble < mindist[i] )
{
- pt11 = pt1;
- pt22 = pt2;
+ mindist[i] = tmpdouble;
+ nearest[i] = im;
+ }
+
+// if( tmpdouble < mindist[im] ) // koko deha muri
+// {
+// mindist[im] = tmpdouble;
+// nearest[im] = i;
+// }
+
+ if( nearest[i] == jm )
+ {
+ nearest[i] = im;
}
- for( intpt2=pt11; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- for( intpt2=pt22; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- *intpt = -1;
}
+ }
+}
- minscore *= 0.5;
+static void *verycompactmsadistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau
+{
+ compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg;
+ int njob = targ->njob;
+ int para = targ->para;
+ int im = targ->im;
+ int jm = targ->jm;
+// int thread_no = targ->thread_no;
+ int *tselfscore = targ->tselfscore;
+ char **seq = targ->seq;
+ int **skiptable = targ->skiptable;
+ int *joblist = targ->joblist;
+ int *posshared = targ->posshared;
+ double *mindist = targ->mindist;
+ int *nearest = targ->nearest;
+// double **partmtx = targ->partmtx;
+ double *newarr = targ->newarr;
+ int i, posinjoblist, n;
+
+ double tmpdist1;
+ double tmpdist2;
+ double tmpdouble;
- len[k][0] = minscore - tmptmplen[im];
- len[k][1] = minscore - tmptmplen[jm];
+// for( acpti=ac; acpti!=NULL; acpti=acpti->next )
- if( len[k][0] < 0.0 ) len[k][0] = 0.0;
- if( len[k][1] < 0.0 ) len[k][1] = 0.0;
- tmptmplen[im] = minscore;
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_lock( targ->mutex );
+#endif
+ if( *posshared >= njob ) // block no toki >=
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+ commonsextet_p( NULL, NULL );
+ return( NULL );
+ }
+ posinjoblist = *posshared;
+ *posshared += BLOCKSIZE;
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
- hist[im] = k;
+ for( n=0; n<BLOCKSIZE&&posinjoblist<njob; n++ )
+ {
+ i = joblist[posinjoblist++];
+
+ if( i == im ) continue;
+ if( i == jm ) continue;
+
+// if( partmtx[im] )
+// tmpdist1 = partmtx[im][i];
+// else if( partmtx[i] )
+// tmpdist1 = partmtx[i][im];
+// else
+ tmpdist1 = distcompact_msa( seq[im], seq[i], skiptable[im], skiptable[i], tselfscore[im], tselfscore[i] );
+
+// if( partmtx[jm] )
+// tmpdist2 = partmtx[jm][i];
+// else if( partmtx[i] )
+// tmpdist2 = partmtx[i][jm];
+// else
+ tmpdist2 = distcompact_msa( seq[jm], seq[i], skiptable[jm], skiptable[i], tselfscore[jm], tselfscore[i] );
+
+ tmpdouble = cluster_mix_double( tmpdist1, tmpdist2 );
+ newarr[i] = tmpdouble;
+
+// if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i];
+
+ if( tmpdouble < mindist[i] )
+ {
+ mindist[i] = tmpdouble;
+ nearest[i] = im;
+ }
+
+// if( tmpdouble < mindist[im] ) // koko deha muri
+// {
+// mindist[im] = tmpdouble;
+// nearest[im] = i;
+// }
+
+ if( nearest[i] == jm )
+ {
+ nearest[i] = im;
+ }
+ }
+ }
+}
- for( i=0; i!=-1; i=ac[i].next )
- {
- if( i != im && i != jm )
- {
- if( i < im )
- {
- miniim = i;
- maxiim = im;
- minijm = i;
- maxijm = jm;
- }
- else if( i < jm )
- {
- miniim = im;
- maxiim = i;
- minijm = i;
- maxijm = jm;
- }
- else
- {
- miniim = im;
- maxiim = i;
- minijm = jm;
- maxijm = i;
- }
- eff0 = eff[miniim][maxiim];
- eff1 = eff[minijm][maxijm];
- eff[miniim][maxiim] =
- MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
- ( eff0 + eff1 ) * 0.5 * SUEFF;
- }
- }
- acjmprev = ac[jm].prev;
- acjmnext = ac[jm].next;
- ac[acjmprev].next = acjmnext;
- if( acjmnext != -1 )
- ac[acjmnext].prev = acjmprev;
+static void *msadistarrthreadjoblist( void *arg ) // enablemultithread == 0 demo tsukau
+{
+ compactdistarrthread_arg_t *targ = (compactdistarrthread_arg_t *)arg;
+ int njob = targ->njob;
+ int para = targ->para;
+ int im = targ->im;
+ int jm = targ->jm;
+// int thread_no = targ->thread_no;
+ int *tselfscore = targ->tselfscore;
+ char **seq = targ->seq;
+ int **skiptable = targ->skiptable;
+ int *joblist = targ->joblist;
+ int *posshared = targ->posshared;
+ double *mindist = targ->mindist;
+ int *nearest = targ->nearest;
+ double **partmtx = targ->partmtx;
+ double *newarr = targ->newarr;
+ int i, posinjoblist, n;
+
+ double tmpdist1;
+ double tmpdist2;
+ double tmpdouble;
- sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
- strcpy( tree[im], treetmp );
-#if 0
- fprintf( stdout, "STEP-%03d:\n", k+1 );
- fprintf( stdout, "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
- fprintf( stdout, "\n" );
- fprintf( stdout, "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
- fprintf( stdout, "\n" );
-#endif
- }
- fclose( fp );
+// for( acpti=ac; acpti!=NULL; acpti=acpti->next )
- fp = fopen( "infile.tree", "w" );
- fprintf( fp, "%s\n", treetmp );
-// fprintf( fp, "by veryfastsupg_double_loadtop\n" );
- fclose( fp );
-#if 1
- fprintf( stderr, "\n" );
- free( (void *)tmptmplen ); tmptmplen = NULL;
- free( hist ); hist = NULL;
- free( (char *)ac ); ac = NULL;
- FreeCharMtx( tree );
- free( treetmp );
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_lock( targ->mutex );
+#endif
+ if( *posshared >= njob ) // block no toki >=
+ {
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
+#endif
+ commonsextet_p( NULL, NULL );
+ return( NULL );
+ }
+ posinjoblist = *posshared;
+ *posshared += BLOCKSIZE;
+#ifdef enablemultithread
+ if( para ) pthread_mutex_unlock( targ->mutex );
#endif
+
+ for( n=0; n<BLOCKSIZE&&posinjoblist<njob; n++ )
+ {
+ i = joblist[posinjoblist++];
+
+ if( i == im ) continue;
+ if( i == jm ) continue;
+
+ if( partmtx[im] )
+ tmpdist1 = partmtx[im][i];
+ else if( partmtx[i] )
+ tmpdist1 = partmtx[i][im];
+ else
+ tmpdist1 = distcompact_msa( seq[im], seq[i], skiptable[im], skiptable[i], tselfscore[im], tselfscore[i] );
+
+ if( partmtx[jm] )
+ tmpdist2 = partmtx[jm][i];
+ else if( partmtx[i] )
+ tmpdist2 = partmtx[i][jm];
+ else
+ tmpdist2 = distcompact_msa( seq[jm], seq[i], skiptable[jm], skiptable[i], tselfscore[jm], tselfscore[i] );
+
+ tmpdouble = cluster_mix_double( tmpdist1, tmpdist2 );
+ newarr[i] = tmpdouble;
+
+ if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i];
+
+ if( tmpdouble < mindist[i] )
+ {
+ mindist[i] = tmpdouble;
+ nearest[i] = im;
+ }
+
+// if( tmpdouble < mindist[im] ) // koko deha muri
+// {
+// mindist[im] = tmpdouble;
+// nearest[im] = i;
+// }
+
+ if( nearest[i] == jm )
+ {
+ nearest[i] = im;
+ }
+ }
+ }
}
-void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len )
+void compacttree_memsaveselectable( int nseq, double **partmtx, int *nearest, double *mindist, int **pointt, int *tselfscore, char **seq, int **skiptable, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int treeout, int howcompact, int memsave )
{
- int i, k, miniim, maxiim, minijm, maxijm;
+ int i, j, k;
+// int miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
- double eff1, eff0;
- static double *tmptmplen = NULL;
- static int *hist = NULL;
- static Achain *ac = NULL;
- double minscore;
- static char **tree;
- static char *treetmp;
+// double tmpdouble;
+// double eff1, eff0;
+ double *tmptmplen = NULL; //static?
+ int *hist = NULL; //static?
+ Bchain *ac = NULL; //static?
int im = -1, jm = -1;
- int prevnode, acjmnext, acjmprev;
+ Bchain *acjmnext, *acjmprev;
+ int prevnode;
+ Bchain *acpti;
int *pt1, *pt2, *pt11, *pt22;
+ int *nmemar; //static?
+ int nmemim, nmemjm;
+ double minscore;
+ char **tree; //static?
+ char *treetmp; //static?
+ char *nametmp, *nameptr, *tmpptr; //static?
FILE *fp;
- int node[2];
- float lenfl[2];
-
- fp = fopen( "_guidetree", "r" );
- if( !fp )
+ double (*clusterfuncpt[1])(double,double);
+ char namec;
+ int *singlettable1 = NULL;
+ int *singlettable2 = NULL;
+ double *newarr;
+ void *(*distarrfunc)( void * );
+ void *(*resetnearestfunc)( void * );
+ int numfilled;
+ int nthreadtree;
+ compactdistarrthread_arg_t *distarrarg;
+ resetnearestthread_arg_t *resetarg;
+ int *joblist, nactive, posshared;
+ double *result;
+
+
+ sueff1 = 1 - (double)sueff_global;
+ sueff05 = (double)sueff_global * 0.5;
+ if ( treemethod == 'X' )
+ clusterfuncpt[0] = cluster_mix_double;
+ else
{
- fprintf( stderr, "cannot open _guidetree\n" );
+ reporterr( "Unknown treemethod, %c\n", treemethod );
exit( 1 );
}
+ if( howcompact == 2 )
+ {
+ if( seq )
+ {
+// distarrfunc = verycompactmsadistarrthread;
+ distarrfunc = verycompactmsadistarrthreadjoblist;
+ resetnearestfunc = NULL;
+ }
+ else
+ {
+// distarrfunc = verycompactkmerdistarrthread;
+ distarrfunc = verycompactkmerdistarrthreadjoblist;
+ resetnearestfunc = NULL;
+ }
+ }
+ else
+ {
+ if( seq )
+ {
+ distarrfunc = msadistarrthreadjoblist;
+ resetnearestfunc = msaresetnearestthread;
+ }
+ else
+ {
+ distarrfunc = kmerdistarrthreadjoblist;
+ resetnearestfunc = kmerresetnearestthread;
+ }
+ }
+ distarrarg = calloc( MAX( nthread, 1 ), sizeof( compactdistarrthread_arg_t ) );
+ resetarg = calloc( MAX( nthread, 1 ), sizeof( resetnearestthread_arg_t ) );
+ joblist = calloc( njob, sizeof( int ) );
+ if( howcompact != 2 ) result = calloc( njob, sizeof( double ) );
+ else result = NULL;
+
if( !hist )
{
- treetmp = AllocateCharVec( njob*50 );
- tree = AllocateCharMtx( njob, njob*50 );
hist = AllocateIntVec( njob );
- tmptmplen = (double *)malloc( njob * sizeof( double ) );
- ac = (Achain *)malloc( njob * sizeof( Achain ) );
+ tmptmplen = AllocateFloatVec( njob );
+ ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
+ nmemar = AllocateIntVec( njob );
+ if( treeout )
+ {
+ treetmp = NULL; // kentou 2013/06/12
+ nametmp = AllocateCharVec( 1000 ); // nagasugi
+ tree = AllocateCharMtx( njob, 0 );
+ }
}
- for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
+ if( treeout )
+ {
+ for( i=0; i<nseq; i++ )
+ {
+ for( j=0; j<999; j++ ) nametmp[j] = 0;
+ for( j=0; j<999; j++ )
+ {
+ namec = name[i][j];
+ if( namec == 0 )
+ break;
+ else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
+ nametmp[j] = namec;
+ else
+ nametmp[j] = '_';
+ }
+ nametmp[j] = 0;
+// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
+ if( tree[i] == NULL )
+ {
+ reporterr( "Cannot allocate tree!\n" );
+ exit( 1 );
+ }
+ sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
+ }
+ }
+
for( i=0; i<nseq; i++ )
{
- ac[i].next = i+1;
- ac[i].prev = i-1;
-// ac[i].curr = i;
+ ac[i].next = ac+i+1;
+ ac[i].prev = ac+i-1;
+ ac[i].pos = i;
}
- ac[nseq-1].next = -1;
+ ac[nseq-1].next = NULL;
+
+// for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
- for( i=0; i<nseq; i++ ) hist[i] = -1;
+ for( i=0; i<nseq; i++ )
+ {
+ hist[i] = -1;
+ nmemar[i] = 1;
+ }
- fprintf( stderr, "\n" );
- for( k=0; k<nseq-1; k++ )
- {
- if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
+ for( i=0,numfilled=0; i<nseq; i++ ) if( partmtx[i] ) numfilled++;
+ reporterr( "\n" );
+ for( k=0; k<nseq-1; k++ )
+ {
+ if( k % 100 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
-#if 0
- minscore = 99999.9;
- for( i=0; ac[i].next!=-1; i=ac[i].next )
+// for( i=0,j=0; i<nseq; i++ ) if( partmtx[i] ) j++;
+// if( k% 100 == 0 ) reporterr( "numfilled=%d, filledinpartmtx=%d, numempty=%d\n", numfilled, j, nseq-k-numfilled );
+
+ minscore = 999.9;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
- for( j=ac[i].next; j!=-1; j=ac[j].next )
- {
- tmpdouble = eff[i][j];
- if( tmpdouble < minscore )
- {
- minscore = tmpdouble;
- im = i; jm = j;
- }
+ i = acpti->pos;
+// printf( "k=%d i=%d, mindist[i]=%f\n", k, i, mindist[i] );
+ if( mindist[i] < minscore ) // muscle
+ {
+ im = i;
+ minscore = mindist[i];
}
}
-#else
- lenfl[0] = lenfl[1] = -1.0;
- loadtreeoneline( node, lenfl, fp );
- im = node[0];
- jm = node[1];
- minscore = eff[im][jm];
-
-// fprintf( stderr, "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
+// printf( "minscore=%f\n", minscore );
+ jm = nearest[im];
+// printf( "im=%d\n", im );
+// printf( "jm=%d\n", jm );
- if( lenfl[0] == -1.0 || lenfl[1] == -1.0 )
+ if( jm < im )
{
- fprintf( stderr, "\n\nWARNING: Branch length is not given.\n" );
- exit( 1 );
+ j=jm; jm=im; im=j;
}
- if( lenfl[0] < 0.0 ) lenfl[0] = 0.0;
- if( lenfl[1] < 0.0 ) lenfl[1] = 0.0;
-#endif
-
-// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
+ if( partmtx[im] == NULL && howcompact != 2 ) numfilled++;
+ if( partmtx[jm] != NULL ) numfilled--;
- intpt = topol[k][0];
prevnode = hist[im];
+ if( dep ) dep[k].child0 = prevnode;
+ nmemim = nmemar[im];
+ if( memsave )
+ intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave
+ else
+ intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) ); // memsave
if( prevnode == -1 )
{
*intpt++ = im;
if( *pt1 > *pt2 )
{
pt11 = pt2;
- pt22 = pt1;
+// pt22 = pt1;
}
else
{
pt11 = pt1;
- pt22 = pt2;
+// pt22 = pt2;
+ }
+ if( memsave )
+ {
+ *intpt++ = *pt11;
+ *intpt = -1;
+ }
+ else
+ {
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
}
- for( intpt2=pt11; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- for( intpt2=pt22; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- *intpt = -1;
}
- intpt = topol[k][1];
prevnode = hist[jm];
+ if( dep ) dep[k].child1 = prevnode;
+ nmemjm = nmemar[jm];
+ if( memsave )
+ intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) ); // memsave
+ else
+ intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) ); // memsave
+ if( !intpt )
+ {
+ reporterr( "Cannot reallocate topol\n" );
+ exit( 1 );
+ }
if( prevnode == -1 )
{
*intpt++ = jm;
if( *pt1 > *pt2 )
{
pt11 = pt2;
- pt22 = pt1;
+// pt22 = pt1;
}
else
{
pt11 = pt1;
- pt22 = pt2;
+// pt22 = pt2;
+ }
+ if( memsave )
+ {
+ *intpt++ = *pt11;
+ *intpt = -1;
+ }
+ else
+ {
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
}
- for( intpt2=pt11; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- for( intpt2=pt22; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- *intpt = -1;
}
minscore *= 0.5;
-#if 0
- len[k][0] = minscore - tmptmplen[im];
- len[k][1] = minscore - tmptmplen[jm];
-#else
- len[k][0] = lenfl[0];
- len[k][1] = lenfl[1];
-#endif
+// printf( "minscore = %f, tmptmplen[im] = %f, tmptmplen[jm] = %f\n", minscore, tmptmplen[im], tmptmplen[jm] );
- tmptmplen[im] = minscore;
+ len[k][0] = ( minscore - tmptmplen[im] );
+ len[k][1] = ( minscore - tmptmplen[jm] );
- hist[im] = k;
+ if( dep ) dep[k].distfromtip = minscore;
+// reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore );
- for( i=0; i!=-1; i=ac[i].next )
- {
- if( i != im && i != jm )
- {
- if( i < im )
- {
- miniim = i;
- maxiim = im;
- minijm = i;
- maxijm = jm;
- }
- else if( i < jm )
- {
- miniim = im;
- maxiim = i;
- minijm = i;
- maxijm = jm;
- }
- else
- {
- miniim = im;
- maxiim = i;
- minijm = jm;
- maxijm = i;
- }
- eff0 = eff[miniim][maxiim];
- eff1 = eff[minijm][maxijm];
- eff[miniim][maxiim] =
- MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
- ( eff0 + eff1 ) * 0.5 * SUEFF;
- }
- }
- acjmprev = ac[jm].prev;
- acjmnext = ac[jm].next;
- ac[acjmprev].next = acjmnext;
- if( acjmnext != -1 )
- ac[acjmnext].prev = acjmprev;
+ tmptmplen[im] = minscore;
- sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
- strcpy( tree[im], treetmp );
+ hist[im] = k;
+ nmemar[im] = nmemim + nmemjm;
+ mindist[im] = 999.9;
-#if 0
- fprintf( stdout, "STEP-%03d:\n", k+1 );
- fprintf( stdout, "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
- fprintf( stdout, "\n" );
- fprintf( stdout, "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
- fprintf( stdout, "\n" );
-#endif
- }
- fclose( fp );
+ if( pointt ) // kmer
+ {
+ singlettable1 = (int *)calloc( tsize, sizeof( int ) );
+ singlettable2 = (int *)calloc( tsize, sizeof( int ) );
+ makecompositiontable_global( singlettable1, pointt[im] );
+ makecompositiontable_global( singlettable2, pointt[jm] );
+ }
+ newarr = calloc( nseq, sizeof( double ) );
- fp = fopen( "infile.tree", "w" );
- fprintf( fp, "%s\n", treetmp );
-// fprintf( fp, "by veryfastsupg_double_loadtree\n" );
- fclose( fp );
+// nthreadtree = MAX( 1, nthread );
+ nthreadtree = nthread;
-#if 1
- fprintf( stderr, "\n" );
- free( (void *)tmptmplen ); tmptmplen = NULL;
- free( hist ); hist = NULL;
- free( (char *)ac ); ac = NULL;
- FreeCharMtx( tree );
- free( treetmp );
-#endif
+ for( acpti=ac,nactive=0; acpti!=NULL; acpti=acpti->next ) joblist[nactive++] = acpti->pos; // sukoshi muda...
-}
-#if 0
-void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len )
-{
- int i, j, k, miniim, maxiim, minijm, maxijm;
- int *intpt, *intpt2;
- double tmpdouble;
- double eff1, eff0;
- static double *tmptmplen = NULL;
- static int *hist = NULL;
- static Achain *ac = NULL;
- double minscore;
- int im = -1, jm = -1;
- int prevnode, acjmnext, acjmprev;
- int *pt1, *pt2, *pt11, *pt22;
- if( !hist )
- {
- hist = AllocateIntVec( njob );
- tmptmplen = (double *)malloc( njob * sizeof( double ) );
- ac = (Achain *)malloc( njob * sizeof( Achain ) );
- }
+#ifdef enablemultithread
+ if( nthreadtree > 0 )
+ {
+ compactdistarrthread_arg_t *targ;
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+
+ posshared = 0;
+// targ = calloc( nthreadtree, sizeof( compactdistarrthread_arg_t ) );
+ targ = distarrarg;
+ handle = calloc( nthreadtree, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex, NULL );
+
+ if( k % 100 == 0 ) reporterr( " (%d threads, nactive=%d, nfilled=%d) \r", nthreadtree, nactive, numfilled );
+ for( i=0; i<nthreadtree; i++ )
+ {
+ targ[i].para = 1;
+ targ[i].njob = nactive;
+// targ[i].thread_no = i;
+ targ[i].im = im;
+ targ[i].jm = jm;
+ targ[i].tselfscore = tselfscore;
+ targ[i].nlen = nlen;
+ targ[i].seq = seq;
+ targ[i].skiptable = skiptable;
+ targ[i].pointt = pointt;
+ targ[i].table1 = singlettable1;
+ targ[i].table2 = singlettable2;
+ targ[i].joblist = joblist;
+ targ[i].posshared = &posshared;
+ targ[i].mindist = mindist;
+ targ[i].nearest = nearest;
+ targ[i].newarr = newarr;
+ targ[i].partmtx = partmtx;
+ targ[i].mutex = &mutex;
+
+ pthread_create( handle+i, NULL, distarrfunc, (void *)(targ+i) );
+ }
+
+ for( j=0; j<nthreadtree; j++ ) pthread_join( handle[j], NULL );
+ pthread_mutex_destroy( &mutex );
+ free( handle );
+// free( targ );
- for( i=0; i<nseq; i++ )
- {
- ac[i].next = i+1;
- ac[i].prev = i-1;
-// ac[i].curr = i;
- }
- ac[nseq-1].next = -1;
-
- for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
- for( i=0; i<nseq; i++ ) hist[i] = -1;
+#if 0
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next ) // antei sei no tame
+ {
+ i = acpti->pos;
+ if( i != im && i != jm )
+ {
+// if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i]; // heiretsu demo ii.
+// if( newarr[i] < mindist[i] )
+// {
+// mindist[i] = newarr[i];
+// nearest[i] = im;
+// }
+ if( newarr[i] < mindist[im] )
+ {
+ mindist[im] = newarr[i];
+ nearest[im] = i;
+ }
+// if( nearest[i] == jm )
+// {
+// nearest[i] = im;
+// }
+ }
+ }
+#endif
+ }
+ else
+#endif
+ {
+ if( k % 100 == 0 ) reporterr( " (serial, nactive=%d, nfilled=%d) \r", nactive, numfilled );
+ compactdistarrthread_arg_t *targ;
+
+ posshared = 0;
+// targ = calloc( 1, sizeof( compactdistarrthread_arg_t ) );
+ targ = distarrarg;
+
+ for( i=0; i<1; i++ )
+ {
+ targ[i].para = 0;
+ targ[i].njob = nactive;
+// targ[i].thread_no = i;
+ targ[i].im = im;
+ targ[i].jm = jm;
+ targ[i].tselfscore = tselfscore;
+ targ[i].nlen = nlen;
+ targ[i].seq = seq;
+ targ[i].skiptable = skiptable;
+ targ[i].pointt = pointt;
+ targ[i].table1 = singlettable1;
+ targ[i].table2 = singlettable2;
+ targ[i].joblist = joblist;
+ targ[i].posshared = &posshared;
+ targ[i].mindist = mindist;
+ targ[i].nearest = nearest;
+ targ[i].newarr = newarr;
+ targ[i].partmtx = partmtx;
+
+ distarrfunc( targ+i );
+// pthread_create( handle, NULL, distarrfunc, (void *)(targ) );
+ }
- fprintf( stderr, "\n" );
- for( k=0; k<nseq-1; k++ )
- {
- if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
+// free( targ );
+
+ }
- minscore = 99999.9;
- for( i=0; ac[i].next!=-1; i=ac[i].next )
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next ) // antei sei no tame
{
- for( j=ac[i].next; j!=-1; j=ac[j].next )
- {
- tmpdouble = eff[i][j];
- if( tmpdouble < minscore )
+ i = acpti->pos;
+ if( i != im && i != jm )
+ {
+// if( partmtx[i] ) partmtx[i][im] = partmtx[i][jm] = newarr[i]; // heiretsu demo ii.
+// if( newarr[i] < mindist[i] )
+// {
+// mindist[i] = newarr[i];
+// nearest[i] = im;
+// }
+ if( newarr[i] < mindist[im] )
{
- minscore = tmpdouble;
- im = i; jm = j;
+ mindist[im] = newarr[i];
+ nearest[im] = i;
}
+// if( nearest[i] == jm )
+// {
+// nearest[i] = im;
+// }
}
}
-// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
+// printf( "im=%d, jm=%d\n", im, jm );
+#if 0
+ printf( "matrix = \n" );
+ for( i=0; i<njob; i++ )
+ {
+ if( partmtx[i] ) for( j=0; j<njob; j++ ) printf( "%f ", partmtx[i][j] );
+ else printf( "nai" );
+ printf( "\n" );
+
+ }
+#endif
+// if( k%500 == 0 )
+// {
+// reporterr( "at step %d,", k );
+// use_getrusage();
+// }
- intpt = topol[k][0];
- prevnode = hist[im];
- if( prevnode == -1 )
+ if( partmtx[im] ) free( partmtx[im] ); partmtx[im] = NULL;
+ if( partmtx[jm] ) free( partmtx[jm] ); partmtx[jm] = NULL;
+ if( howcompact == 2 )
{
- *intpt++ = im;
- *intpt = -1;
+ free( newarr );
+ newarr = NULL;
}
else
{
- pt1 = topol[prevnode][0];
- pt2 = topol[prevnode][1];
- if( *pt1 > *pt2 )
+ partmtx[im] = newarr;
+ }
+
+
+ if( pointt )
+ {
+ free( singlettable1 );
+ free( singlettable2 );
+ singlettable1 = NULL;
+ singlettable2 = NULL;
+ }
+
+ if( treeout )
+ {
+ treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
+ if( !treetmp )
{
- pt11 = pt2;
- pt22 = pt1;
+ reporterr( "Cannot allocate treetmp\n" );
+ exit( 1 );
}
- else
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ free( tree[im] );
+ free( tree[jm] );
+ tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
+ tree[jm] = NULL;
+ if( tree[im] == NULL )
{
- pt11 = pt1;
- pt22 = pt2;
+ reporterr( "Cannot reallocate tree!\n" );
+ exit( 1 );
}
- for( intpt2=pt11; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- for( intpt2=pt22; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- *intpt = -1;
+ strcpy( tree[im], treetmp );
}
- intpt = topol[k][1];
- prevnode = hist[jm];
- if( prevnode == -1 )
+ acjmprev = ac[jm].prev;
+ acjmnext = ac[jm].next;
+ acjmprev->next = acjmnext;
+ if( acjmnext != NULL )
+ acjmnext->prev = acjmprev;
+
+#if 0 // muscle seems to miss this.
+// int nwork = 0;
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
- *intpt++ = jm;
- *intpt = -1;
+ i = acpti->pos;
+// printf( "reset nearest? i=%d, k=%d, nearest[i]=%d, im=%d, mindist=%f\n", i, k, nearest[i], im, mindist[i] );
+ if( nearest[i] == im )
+ {
+// printf( "reset nearest, i=%d, k=%d\n", i, k );
+ if( partmtx[im][i] > mindist[i] )
+ {
+// nwork++;
+// printf( "go\n" );
+ if( pointt ) // kmer
+ {
+ singlettable1 = (int *)calloc( tsize, sizeof( int ) );
+ makecompositiontable_global( singlettable1, pointt[i] );
+ }
+ resetnearest( nseq, ac, partmtx, mindist+i, nearest+i, i, seq, skiptable, tselfscore, pointt, nlen, singlettable1 );
+ if( pointt ) free( singlettable1 ); singlettable1 = NULL;// kmer
+ if( pointt ) commonsextet_p( NULL, NULL );
+ }
+ }
}
- else
+// reporterr( "nwork = %d\n", nwork );
+#else
+
+ if( howcompact == 2 ) continue;
+
+#if 0
+ if( 0 && nthreadtree > 0 )
{
- pt1 = topol[prevnode][0];
- pt2 = topol[prevnode][1];
- if( *pt1 > *pt2 )
+ resetnearestthread_arg_t *targ;
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+ Bchain *acshared;
+
+ acshared = ac;
+// targ = calloc( nthreadtree, sizeof( resetnearestthread_arg_t ) );
+ targ = resetarg;
+ handle = calloc( nthreadtree, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex, NULL );
+
+ for( i=0; i<nthreadtree; i++ )
{
- pt11 = pt2;
- pt22 = pt1;
+ targ[i].para = 1;
+ targ[i].nseq = nseq;
+ targ[i].im = im;
+ targ[i].partmtx = partmtx;
+ targ[i].mindist = mindist;
+ targ[i].nearest = nearest;
+ targ[i].seq = seq;
+ targ[i].skiptable = skiptable;
+ targ[i].tselfscore = tselfscore;
+ targ[i].pointt = pointt;
+ targ[i].nlen = nlen;
+ targ[i].acpt = &acshared;
+ targ[i].ac = ac;
+ targ[i].mutex = &mutex;
+
+ pthread_create( handle+i, NULL, resetnearestfunc, (void *)(targ+i) );
}
- else
+
+ for( j=0; j<nthreadtree; j++ ) pthread_join( handle[j], NULL );
+ pthread_mutex_destroy( &mutex );
+ free( handle );
+// free( targ );
+ }
+ else
+#endif
+ {
+ Bchain *acshared;
+ acshared = ac;
+ resetnearestthread_arg_t *targ;
+// targ = calloc( 1, sizeof( resetnearestthread_arg_t ) );
+ targ = resetarg;
{
- pt11 = pt1;
- pt22 = pt2;
+ targ[0].para = 0;
+ targ[0].nseq = nseq;
+ targ[0].im = im;
+ targ[0].partmtx = partmtx;
+ targ[0].mindist = mindist;
+ targ[0].nearest = nearest;
+ targ[0].seq = seq;
+ targ[0].skiptable = skiptable;
+ targ[0].tselfscore = tselfscore;
+ targ[0].pointt = pointt;
+ targ[0].nlen = nlen;
+ targ[0].result = result;
+ targ[0].joblist = joblist;
+ targ[0].acpt = &acshared;
+ targ[0].ac = ac;
+
+ resetnearestfunc( targ );
}
- for( intpt2=pt11; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- for( intpt2=pt22; *intpt2!=-1; )
- *intpt++ = *intpt2++;
- *intpt = -1;
+// free( targ );
}
+#endif
- minscore *= 0.5;
- len[k][0] = minscore - tmptmplen[im];
- len[k][1] = minscore - tmptmplen[jm];
+#if 0
+ printf( "\nooSTEP-%03d:\n", k+1 );
+ printf( "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i]+1 );
+ printf( "\n" );
+ printf( "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 );
+ printf( "\n" );
+#endif
+ }
+ if( treeout )
+ {
+ fp = fopen( "infile.tree", "w" );
+ fprintf( fp, "%s\n", treetmp );
+ fclose( fp );
+ }
- tmptmplen[im] = minscore;
+ for( im=0; im<nseq; im++ ) // im wo ugokasu hituyouha nai.
+ {
+ if( partmtx[im] ) free( partmtx[im] ); partmtx[im] = NULL;
+ }
+// if( partmtx ) free( partmtx ); partmtx = NULL; // oya ga free
+ if( treeout )
+ {
+ free( tree[0] );
+ free( tree );
+ free( treetmp );
+ free( nametmp );
+ }
+ free( (void *)tmptmplen ); tmptmplen = NULL;
+ free( hist ); hist = NULL;
+ free( (char *)ac ); ac = NULL;
+ free( (void *)nmemar ); nmemar = NULL;
+ if( singlettable1 ) free( singlettable1 );
+ if( singlettable2 ) free( singlettable2 );
+ free( distarrarg );
+ free( resetarg );
+ free( joblist );
+ if( result ) free( result );
+}
- hist[im] = k;
+void fixed_musclesupg_double_realloc_nobk_halfmtx_treeout_memsave( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int efffree )
+{
- for( i=0; i!=-1; i=ac[i].next )
- {
- if( i != im && i != jm )
- {
- if( i < im )
- {
- miniim = i;
- maxiim = im;
- minijm = i;
- maxijm = jm;
- }
- else if( i < jm )
- {
- miniim = im;
- maxiim = i;
- minijm = i;
- maxijm = jm;
- }
- else
- {
- miniim = im;
- maxiim = i;
- minijm = jm;
- maxijm = i;
- }
- eff0 = eff[miniim][maxiim];
- eff1 = eff[minijm][maxijm];
- eff[miniim][maxiim] =
- MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
- ( eff0 + eff1 ) * 0.5 * SUEFF;
- }
- }
- acjmprev = ac[jm].prev;
- acjmnext = ac[jm].next;
- ac[acjmprev].next = acjmnext;
- if( acjmnext != -1 )
- ac[acjmnext].prev = acjmprev;
-#if 0
- fprintf( stdout, "STEP-%03d:\n", k+1 );
- fprintf( stdout, "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
- fprintf( stdout, "\n" );
- fprintf( stdout, "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
- fprintf( stdout, "\n" );
-#endif
- }
-#if 1
- fprintf( stderr, "\n" );
- free( (void *)tmptmplen ); tmptmplen = NULL;
- free( hist ); hist = NULL;
- free( (char *)ac ); ac = NULL;
-#endif
-}
-#endif
-
-void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name )
-{
- int i, j, k, miniim, maxiim, minijm, maxijm;
- int *intpt, *intpt2;
+ int i, j, k, miniim, maxiim, minijm, maxijm;
+ int *intpt;
double tmpdouble;
double eff1, eff0;
- static double *tmptmplen = NULL;
- static int *hist = NULL;
- static Achain *ac = NULL;
+ double *tmptmplen = NULL; //static?
+ int *hist = NULL; //static?
+ Bchain *ac = NULL; //static?
+ int im = 1, jm = -1;
+ Bchain *acjmnext, *acjmprev;
+ int prevnode;
+ Bchain *acpti;
+ int *pt1, *pt2, *pt11;
+ int *nmemar; //static?
+ int nmemim, nmemjm;
double minscore;
- static char **tree;
- static char *treetmp;
- static char *nametmp;
- FILE *fpout;
- int im = -1, jm = -1;
- int prevnode, acjmnext, acjmprev;
- int *pt1, *pt2, *pt11, *pt22;
+ int *nearest = NULL; // by D.Mathog, a guess
+ double *mindisfrom = NULL; // by D.Mathog, a guess
+ char **tree; //static?
+ char *treetmp; //static?
+ char *nametmp, *nameptr, *tmpptr; //static?
+ FILE *fp;
double (*clusterfuncpt[1])(double,double);
+ char namec;
- sueff1_double = 1 - SUEFF;
- sueff05_double = SUEFF * 0.5;
+ sueff1 = 1 - (double)sueff_global;
+ sueff05 = (double)sueff_global * 0.5;
if ( treemethod == 'X' )
clusterfuncpt[0] = cluster_mix_double;
else if ( treemethod == 'E' )
clusterfuncpt[0] = cluster_minimum_double;
else
{
- fprintf( stderr, "Unknown treemethod, %c\n", treemethod );
+ reporterr( "Unknown treemethod, %c\n", treemethod );
exit( 1 );
}
if( !hist )
{
- treetmp = AllocateCharVec( njob*50 );
- tree = AllocateCharMtx( njob, njob*50 );
hist = AllocateIntVec( njob );
- tmptmplen = (double *)malloc( njob * sizeof( double ) );
- ac = (Achain *)malloc( njob * sizeof( Achain ) );
- nametmp = AllocateCharVec( 31 );
+ tmptmplen = AllocateFloatVec( njob );
+ ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
+ nmemar = AllocateIntVec( njob );
+ mindisfrom = AllocateFloatVec( njob );
+ nearest = AllocateIntVec( njob );
+// treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
+ treetmp = NULL; // kentou 2013/06/12
+ nametmp = AllocateCharVec( 1000 ); // nagasugi
+// tree = AllocateCharMtx( njob, njob*600 );
+ tree = AllocateCharMtx( njob, 0 );
}
-// for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
+
for( i=0; i<nseq; i++ )
{
- for( j=0; j<30; j++ ) nametmp[j] = 0;
- for( j=0; j<30; j++ )
+ for( j=0; j<999; j++ ) nametmp[j] = 0;
+ for( j=0; j<999; j++ )
{
- if( isalnum( name[i][j] ) )
- nametmp[j] = name[i][j];
+ namec = name[i][j];
+ if( namec == 0 )
+ break;
+ else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
+ nametmp[j] = namec;
else
nametmp[j] = '_';
}
- nametmp[30] = 0;
- sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
+ nametmp[j] = 0;
+// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
+ if( tree[i] == NULL )
+ {
+ reporterr( "Cannot allocate tree!\n" );
+ exit( 1 );
+ }
+ sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
}
-
for( i=0; i<nseq; i++ )
{
- ac[i].next = i+1;
- ac[i].prev = i-1;
-// ac[i].curr = i;
+ ac[i].next = ac+i+1;
+ ac[i].prev = ac+i-1;
+ ac[i].pos = i;
}
- ac[nseq-1].next = -1;
+ ac[nseq-1].next = NULL;
+
+ for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
+
for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
- for( i=0; i<nseq; i++ ) hist[i] = -1;
+ for( i=0; i<nseq; i++ )
+ {
+ hist[i] = -1;
+ nmemar[i] = 1;
+ }
- fprintf( stderr, "\n" );
- for( k=0; k<nseq-1; k++ )
- {
- if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
+ reporterr( "\n" );
+ for( k=0; k<nseq-1; k++ )
+ {
+ if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
- minscore = 99999.9;
- for( i=0; ac[i].next!=-1; i=ac[i].next )
+ minscore = 999.9;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
- for( j=ac[i].next; j!=-1; j=ac[j].next )
- {
- tmpdouble = eff[i][j];
- if( tmpdouble < minscore )
- {
- minscore = tmpdouble;
- im = i; jm = j;
- }
+ i = acpti->pos;
+// printf( "k=%d i=%d, mindist[i]=%f\n", k, i, mindisfrom[i] );
+ if( mindisfrom[i] < minscore ) // muscle
+ {
+ im = i;
+ minscore = mindisfrom[i];
}
}
-// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
+// printf( "minscore=%f\n", minscore );
+ jm = nearest[im];
+// printf( "im=%d\n", im );
+// printf( "jm=%d\n", jm );
+ if( jm < im )
+ {
+ j=jm; jm=im; im=j;
+ }
+
- intpt = topol[k][0];
prevnode = hist[im];
+ if( dep ) dep[k].child0 = prevnode;
+ nmemim = nmemar[im];
+ intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave
if( prevnode == -1 )
{
*intpt++ = im;
if( *pt1 > *pt2 )
{
pt11 = pt2;
- pt22 = pt1;
+// pt22 = pt1;
}
else
{
pt11 = pt1;
- pt22 = pt2;
+// pt22 = pt2;
}
+#if 1 // memsave
+ *intpt++ = *pt11;
+ *intpt = -1;
+#else
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
+#endif
}
- intpt = topol[k][1];
prevnode = hist[jm];
+ if( dep ) dep[k].child1 = prevnode;
+ nmemjm = nmemar[jm];
+ intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) ); // memsave
+ if( !intpt )
+ {
+ reporterr( "Cannot reallocate topol\n" );
+ exit( 1 );
+ }
if( prevnode == -1 )
{
*intpt++ = jm;
if( *pt1 > *pt2 )
{
pt11 = pt2;
- pt22 = pt1;
+// pt22 = pt1;
}
else
{
pt11 = pt1;
- pt22 = pt2;
+// pt22 = pt2;
}
+#if 1 // memsave
+ *intpt++ = *pt11;
+ *intpt = -1;
+#else
for( intpt2=pt11; *intpt2!=-1; )
*intpt++ = *intpt2++;
for( intpt2=pt22; *intpt2!=-1; )
*intpt++ = *intpt2++;
*intpt = -1;
+#endif
}
minscore *= 0.5;
- len[k][0] = minscore - tmptmplen[im];
- len[k][1] = minscore - tmptmplen[jm];
+// printf( "minscore = %f, tmptmplen[im] = %f, tmptmplen[jm] = %f\n", minscore, tmptmplen[im], tmptmplen[jm] );
+ len[k][0] = ( minscore - tmptmplen[im] );
+ len[k][1] = ( minscore - tmptmplen[jm] );
+
+ if( dep ) dep[k].distfromtip = minscore;
+// reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore );
tmptmplen[im] = minscore;
hist[im] = k;
+ nmemar[im] = nmemim + nmemjm;
- for( i=0; i!=-1; i=ac[i].next )
+ mindisfrom[im] = 999.9;
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
+ i = acpti->pos;
if( i != im && i != jm )
{
if( i < im )
minijm = jm;
maxijm = i;
}
- eff0 = eff[miniim][maxiim];
- eff1 = eff[minijm][maxijm];
+ eff0 = eff[miniim][maxiim-miniim];
+ eff1 = eff[minijm][maxijm-minijm];
#if 0
- eff[miniim][maxiim] =
- MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
- ( eff0 + eff1 ) * 0.5 * SUEFF;
+ tmpdouble = eff[miniim][maxiim-miniim] =
+ MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
#else
- eff[miniim][maxiim] =
+ tmpdouble = eff[miniim][maxiim-miniim] =
(clusterfuncpt[0])( eff0, eff1 );
+// printf( "tmpdouble=%f, eff0=%f, eff1=%f\n", tmpdouble, eff0, eff1 );
#endif
+ if( tmpdouble < mindisfrom[i] )
+ {
+ mindisfrom[i] = tmpdouble;
+ nearest[i] = im;
+ }
+ if( tmpdouble < mindisfrom[im] )
+ {
+ mindisfrom[im] = tmpdouble;
+ nearest[im] = i;
+ }
+ if( nearest[i] == jm )
+ {
+ nearest[i] = im;
+ }
}
}
- acjmprev = ac[jm].prev;
- acjmnext = ac[jm].next;
- ac[acjmprev].next = acjmnext;
- if( acjmnext != -1 )
- ac[acjmnext].prev = acjmprev;
+// printf( "im=%d, jm=%d\n", im, jm );
+#if 0
+ printf( "matrix = \n" );
+ for( i=0; i<njob; i++ )
+ {
+ for( j=0; j<njob; j++ )
+ {
+ if( i>j )
+ {
+ minijm=j;
+ maxijm=i;
+ }
+ else
+ {
+ minijm=i;
+ maxijm=j;
+ }
+ printf( "%f ", eff[minijm][maxijm-minijm] );
+ }
+ printf( "\n" );
+ }
+#endif
+ treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
+ if( !treetmp )
+ {
+ reporterr( "Cannot allocate treetmp\n" );
+ exit( 1 );
+ }
sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ free( tree[im] );
+ free( tree[jm] );
+ tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
+ tree[jm] = NULL;
+ if( tree[im] == NULL )
+ {
+ reporterr( "Cannot reallocate tree!\n" );
+ exit( 1 );
+ }
strcpy( tree[im], treetmp );
+
+ acjmprev = ac[jm].prev;
+ acjmnext = ac[jm].next;
+ acjmprev->next = acjmnext;
+ if( acjmnext != NULL )
+ acjmnext->prev = acjmprev;
+ if( efffree )
+ {
+ free( (void *)eff[jm] ); eff[jm] = NULL; // Ato de fukkatsu
+ }
+
+#if 1 // muscle seems to miss this.
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+// printf( "reset nearest? i=%d, k=%d, nearest[i]=%d, im=%d, mindist=%f\n", i, k, nearest[i], im, mindisfrom[i] );
+ if( nearest[i] == im )
+ {
+// printf( "reset nearest, i=%d, k=%d\n", i, k );
+ if( i < im )
+ {
+ miniim = i;
+ maxiim = im;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ }
+ if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
+ {
+// printf( "go\n" );
+ setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
+ }
+ }
+ }
+#else
+ reporterr( "CHUUI!\n" );
+#endif
+
+
#if 0
- fprintf( stdout, "STEP-%03d:\n", k+1 );
- fprintf( stdout, "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
- fprintf( stdout, "\n" );
- fprintf( stdout, "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
- fprintf( stdout, "\n" );
+ printf( "\nooSTEP-%03d:\n", k+1 );
+ printf( "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i]+1 );
+ printf( "\n" );
+ printf( "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 );
+ printf( "\n" );
#endif
}
- fpout = fopen( "infile.tree", "w" );
- fprintf( fpout, "%s\n", treetmp );
-// fprintf( fpout, "by veryfastsupg_double_outtree\n" );
- fclose( fpout );
-#if 1
- fprintf( stderr, "\n" );
+ fp = fopen( "infile.tree", "w" );
+ fprintf( fp, "%s\n", treetmp );
+ fclose( fp );
+
+ free( tree[0] );
+ free( tree );
+ free( treetmp );
+ free( nametmp );
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
- FreeCharMtx( tree );
- free( treetmp );
- free( nametmp );
-#endif
+ free( (void *)nmemar ); nmemar = NULL;
+ free( mindisfrom );
+ free( nearest );
}
-void veryfastsupg( int nseq, double **oeff, int ***topol, double **len )
+void fixed_musclesupg_double_realloc_nobk_halfmtx_treeout( int nseq, double **eff, int ***topol, double **len, char **name, int *nlen, Treedep *dep, int efffree )
{
- int i, j, k, miniim, maxiim, minijm, maxijm;
+ int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
- int tmpint;
- int eff1, eff0;
- static double *tmptmplen = NULL;
- static int **eff = NULL;
- static int *hist = NULL;
- static Achain *ac = NULL;
- int minscore;
- double minscoref;
+ double tmpdouble;
+ double eff1, eff0;
+ double *tmptmplen = NULL; //static?
+ int *hist = NULL; //static?
+ Bchain *ac = NULL; //static?
int im = -1, jm = -1;
- int prevnode, acjmnext, acjmprev;
+ Bchain *acjmnext, *acjmprev;
+ int prevnode;
+ Bchain *acpti;
int *pt1, *pt2, *pt11, *pt22;
- if( !eff )
+ int *nmemar; //static?
+ int nmemim, nmemjm;
+ double minscore;
+ int *nearest = NULL; // by D.Mathog, a guess
+ double *mindisfrom = NULL; // by D.Mathog, a guess
+ char **tree; //static?
+ char *treetmp; //static?
+ char *nametmp, *nameptr, *tmpptr; //static?
+ FILE *fp;
+ double (*clusterfuncpt[1])(double,double);
+ char namec;
+
+
+ sueff1 = 1 - (double)sueff_global;
+ sueff05 = (double)sueff_global * 0.5;
+ if ( treemethod == 'X' )
+ clusterfuncpt[0] = cluster_mix_double;
+ else if ( treemethod == 'E' )
+ clusterfuncpt[0] = cluster_average_double;
+ else if ( treemethod == 'q' )
+ clusterfuncpt[0] = cluster_minimum_double;
+ else
+ {
+ reporterr( "Unknown treemethod, %c\n", treemethod );
+ exit( 1 );
+ }
+
+ if( !hist )
{
- eff = AllocateIntMtx( njob, njob );
hist = AllocateIntVec( njob );
- tmptmplen = (double *)malloc( njob * sizeof( double ) );
- ac = (Achain *)malloc( njob * sizeof( Achain ) );
+ tmptmplen = AllocateFloatVec( njob );
+ ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
+ nmemar = AllocateIntVec( njob );
+ mindisfrom = AllocateFloatVec( njob );
+ nearest = AllocateIntVec( njob );
+// treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
+ treetmp = NULL; // kentou 2013/06/12
+ nametmp = AllocateCharVec( 1000 ); // nagasugi
+// tree = AllocateCharMtx( njob, njob*600 );
+ tree = AllocateCharMtx( njob, 0 );
}
+
- for( i=0; i<nseq; i++ )
+ for( i=0; i<nseq; i++ )
{
- for( j=0; j<nseq; j++ )
+ for( j=0; j<999; j++ ) nametmp[j] = 0;
+ for( j=0; j<999; j++ )
{
- eff[i][j] = (int)( oeff[i][j] * INTMTXSCALE + 0.5 );
+ namec = name[i][j];
+ if( namec == 0 )
+ break;
+ else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
+ nametmp[j] = namec;
+ else
+ nametmp[j] = '_';
}
- }
+ nametmp[j] = 0;
+// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+ tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
+ if( tree[i] == NULL )
+ {
+ reporterr( "Cannot allocate tree!\n" );
+ exit( 1 );
+ }
+ sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
+ }
for( i=0; i<nseq; i++ )
{
- ac[i].next = i+1;
- ac[i].prev = i-1;
-// ac[i].curr = i;
+ ac[i].next = ac+i+1;
+ ac[i].prev = ac+i-1;
+ ac[i].pos = i;
}
- ac[nseq-1].next = -1;
+ ac[nseq-1].next = NULL;
+
+ for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
- for( i=0; i<nseq; i++ ) hist[i] = -1;
+ for( i=0; i<nseq; i++ )
+ {
+ hist[i] = -1;
+ nmemar[i] = 1;
+ }
- fprintf( stderr, "\n" );
- for( k=0; k<nseq-1; k++ )
- {
- if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
+ reporterr( "\n" );
+ for( k=0; k<nseq-1; k++ )
+ {
+ if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
- minscore = INTMTXSCALE*4;
- for( i=0; ac[i].next!=-1; i=ac[i].next )
+ minscore = 999.9;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
- for( j=ac[i].next; j!=-1; j=ac[j].next )
- {
- tmpint = eff[i][j];
- if( tmpint < minscore )
- {
- minscore = tmpint;
- im = i; jm = j;
- }
+ i = acpti->pos;
+// reporterr( "k=%d i=%d\n", k, i );
+ if( mindisfrom[i] < minscore ) // muscle
+ {
+ im = i;
+ minscore = mindisfrom[i];
}
}
- minscoref = (double)minscore * 0.5 / ( INTMTXSCALE );
+ jm = nearest[im];
+ if( jm < im )
+ {
+ j=jm; jm=im; im=j;
+ }
-// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
-#if 1
- intpt = topol[k][0];
prevnode = hist[im];
+ if( dep ) dep[k].child0 = prevnode;
+ nmemim = nmemar[im];
+ intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
- intpt = topol[k][1];
prevnode = hist[jm];
+ if( dep ) dep[k].child1 = prevnode;
+ nmemjm = nmemar[jm];
+ intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
+ if( !intpt )
+ {
+ reporterr( "Cannot reallocate topol\n" );
+ exit( 1 );
+ }
if( prevnode == -1 )
{
*intpt++ = jm;
*intpt++ = *intpt2++;
*intpt = -1;
}
-#else
- intpt = topol[k][0];
- for( i=0; i<nseq; i++ )
- if( pair[im][i] > -2 )
- *intpt++ = i;
- *intpt = -1;
- intpt = topol[k][1];
- for( i=0; i<nseq; i++ )
- if( pair[jm][i] > -2 )
- *intpt++ = i;
- *intpt = -1;
-#endif
+ minscore *= 0.5;
- len[k][0] = minscoref - tmptmplen[im];
- len[k][1] = minscoref - tmptmplen[jm];
+ len[k][0] = ( minscore - tmptmplen[im] );
+ len[k][1] = ( minscore - tmptmplen[jm] );
- tmptmplen[im] = minscoref;
+ if( dep ) dep[k].distfromtip = minscore;
+// reporterr( "\n##### dep[%d].distfromtip = %f\n", k, minscore );
+
+ tmptmplen[im] = minscore;
hist[im] = k;
+ nmemar[im] = nmemim + nmemjm;
- for( i=0; i!=-1; i=ac[i].next )
+ mindisfrom[im] = 999.9;
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
+ i = acpti->pos;
if( i != im && i != jm )
{
if( i < im )
minijm = jm;
maxijm = i;
}
- eff0 = eff[miniim][maxiim];
- eff1 = eff[minijm][maxijm];
- eff[miniim][maxiim] =
- MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
- ( eff0 + eff1 ) * 0.5 * SUEFF;
+ eff0 = eff[miniim][maxiim-miniim];
+ eff1 = eff[minijm][maxijm-minijm];
+#if 0
+ tmpdouble = eff[miniim][maxiim-miniim] =
+ MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
+#else
+ tmpdouble = eff[miniim][maxiim-miniim] =
+ (clusterfuncpt[0])( eff0, eff1 );
+
+
+#endif
+ if( tmpdouble < mindisfrom[i] )
+ {
+ mindisfrom[i] = tmpdouble;
+ nearest[i] = im;
+ }
+ if( tmpdouble < mindisfrom[im] )
+ {
+ mindisfrom[im] = tmpdouble;
+ nearest[im] = i;
+ }
+ if( nearest[i] == jm )
+ {
+ nearest[i] = im;
+ }
}
}
+
+ treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
+ if( !treetmp )
+ {
+ reporterr( "Cannot allocate treetmp\n" );
+ exit( 1 );
+ }
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ free( tree[im] );
+ free( tree[jm] );
+ tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
+ tree[jm] = NULL;
+ if( tree[im] == NULL )
+ {
+ reporterr( "Cannot reallocate tree!\n" );
+ exit( 1 );
+ }
+ strcpy( tree[im], treetmp );
+
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
- ac[acjmprev].next = acjmnext;
- if( acjmnext != -1 )
- ac[acjmnext].prev = acjmprev;
+ acjmprev->next = acjmnext;
+ if( acjmnext != NULL )
+ acjmnext->prev = acjmprev;
+ if( efffree )
+ {
+ free( (void *)eff[jm] ); eff[jm] = NULL;
+ }
+
+#if 1 // muscle seems to miss this.
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+ if( nearest[i] == im )
+ {
+ if( i < im )
+ {
+ miniim = i;
+ maxiim = im;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ }
+ if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
+ setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
+ }
+ }
+#else
+ reporterr( "chuui!\n" );
+#endif
+
+
#if 0
- fprintf( stdout, "STEP-%03d:\n", k+1 );
- fprintf( stdout, "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
- fprintf( stdout, "\n" );
- fprintf( stdout, "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
- fprintf( stdout, "\n" );
+ printf( "\nooSTEP-%03d:\n", k+1 );
+ printf( "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i]+1 );
+ printf( "\n" );
+ printf( "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i]+1 );
+ printf( "\n" );
#endif
}
-#if 1
- FreeIntMtx( eff ); eff = NULL;
+ fp = fopen( "infile.tree", "w" );
+ fprintf( fp, "%s\n", treetmp );
+ fclose( fp );
+
+ free( tree[0] );
+ free( tree );
+ free( treetmp );
+ free( nametmp );
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
-#endif
+ free( (void *)nmemar ); nmemar = NULL;
+ free( mindisfrom );
+ free( nearest );
}
-void veryfastsupg_int( int nseq, int **oeff, int ***topol, double **len )
-/* len\e$B$O!"\e(B oeff\e$B$,@0?t!#\e(Blen\e$B$b<B$O@0?t!#\e(B
- \e$BI,MW$K1~$8$F3d$C$F;H$&!#\e(B */
+
+void fixed_musclesupg_double_treeout( int nseq, double **eff, int ***topol, double **len, char **name )
{
- int i, j, k, miniim, maxiim, minijm, maxijm;
+ int i, j, k, miniim, maxiim, minijm, maxijm;
int *intpt, *intpt2;
- int tmpint;
- int eff1, eff0;
- static int *tmptmplen = NULL;
- static int **eff = NULL;
- static int *hist = NULL;
- static Achain *ac = NULL;
- int minscore;
+ double tmpdouble;
+ double eff1, eff0;
+ static double *tmptmplen = NULL;
+ static int *hist = NULL;
+ static Bchain *ac = NULL;
int im = -1, jm = -1;
- int prevnode, acjmnext, acjmprev;
+ Bchain *acjmnext, *acjmprev;
+ int prevnode;
+ Bchain *acpti;
int *pt1, *pt2, *pt11, *pt22;
+ static int *nmemar;
+ int nmemim, nmemjm;
+ double minscore;
+ int *nearest = NULL; // by D.Mathog, a guess
+ double *mindisfrom = NULL; // by D.Mathog, a guess
+ static char **tree;
+ static char *treetmp;
+ static char *nametmp, *nameptr, *tmpptr;
+ FILE *fp;
+ double (*clusterfuncpt[1])(double,double);
+ char namec;
- if( !eff )
+ sueff1 = 1.0 - sueff_global;
+ sueff05 = sueff_global * 0.5;
+ if ( treemethod == 'X' )
+ clusterfuncpt[0] = cluster_mix_double;
+ else if ( treemethod == 'E' )
+ clusterfuncpt[0] = cluster_average_double;
+ else if ( treemethod == 'q' )
+ clusterfuncpt[0] = cluster_minimum_double;
+ else
+ {
+ reporterr( "Unknown treemethod, %c\n", treemethod );
+ exit( 1 );
+ }
+
+
+
+
+
+#if 0
+ if( !hist )
{
- eff = AllocateIntMtx( njob, njob );
hist = AllocateIntVec( njob );
- tmptmplen = AllocateIntVec( njob );
- ac = (Achain *)malloc( njob * sizeof( Achain ) );
+ tmptmplen = AllocateDoubleVec( njob );
+ ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
+ nmemar = AllocateIntVec( njob );
+ mindisfrom = AllocateDoubleVec( njob );
+ nearest = AllocateIntVec( njob );
+ treetmp = AllocateCharVec( njob*150 );
+ nametmp = AllocateCharVec( 91 );
+ tree = AllocateCharMtx( njob, njob*150 );
+ }
+ for( i=0; i<nseq; i++ )
+ {
+ for( j=0; j<90; j++ ) nametmp[j] = 0;
+ for( j=0; j<90; j++ )
+ {
+ if( name[i][j] == 0 )
+ break;
+ else if( isalnum( name[i][j] ) )
+ nametmp[j] = name[i][j];
+ else
+ nametmp[j] = '_';
+ }
+ nametmp[90] = 0;
+// sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr );
+ }
+
+#else
+
+ if( !hist )
+ {
+ hist = AllocateIntVec( njob );
+ tmptmplen = AllocateDoubleVec( njob );
+ ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
+ nmemar = AllocateIntVec( njob );
+ mindisfrom = AllocateDoubleVec( njob );
+ nearest = AllocateIntVec( njob );
+// treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
+ treetmp = NULL; // kentou 2013/06/12
+ nametmp = AllocateCharVec( 1000 ); // nagasugi
+// tree = AllocateCharMtx( njob, njob*600 );
+ tree = AllocateCharMtx( njob, 0 );
}
+
- for( i=0; i<nseq; i++ )
+ for( i=0; i<nseq; i++ )
{
- for( j=0; j<nseq; j++ )
+ for( j=0; j<999; j++ ) nametmp[j] = 0;
+ for( j=0; j<999; j++ )
+ {
+ namec = name[i][j];
+ if( namec == 0 )
+ break;
+ else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
+ nametmp[j] = namec;
+ else
+ nametmp[j] = '_';
+ }
+ nametmp[j] = 0;
+// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
+ if( tree[i] == NULL )
{
- eff[i][j] = ( oeff[i][j] );
+ reporterr( "Cannot allocate tree!\n" );
+ exit( 1 );
}
+ sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
}
+#endif
+
+
+
+
+
+
+
+
for( i=0; i<nseq; i++ )
{
- ac[i].next = i+1;
- ac[i].prev = i-1;
-// ac[i].curr = i;
+ ac[i].next = ac+i+1;
+ ac[i].prev = ac+i-1;
+ ac[i].pos = i;
}
- ac[nseq-1].next = -1;
+ ac[nseq-1].next = NULL;
- for( i=0; i<nseq; i++ ) tmptmplen[i] = 0;
- for( i=0; i<nseq; i++ ) hist[i] = -1;
+ for( i=0; i<nseq; i++ ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
- fprintf( stderr, "\n" );
- for( k=0; k<nseq-1; k++ )
- {
- if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
+ for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
+ for( i=0; i<nseq; i++ )
+ {
+ hist[i] = -1;
+ nmemar[i] = 1;
+ }
- minscore = INTMTXSCALE*4;
- for( i=0; ac[i].next!=-1; i=ac[i].next )
+ reporterr( "\n" );
+ for( k=0; k<nseq-1; k++ )
+ {
+ if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
+
+ minscore = 999.9;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
- for( j=ac[i].next; j!=-1; j=ac[j].next )
- {
- tmpint = eff[i][j];
- if( tmpint < minscore )
- {
- minscore = tmpint;
- im = i; jm = j;
- }
+ i = acpti->pos;
+// reporterr( "k=%d i=%d\n", k, i );
+ if( mindisfrom[i] < minscore ) // muscle
+ {
+ im = i;
+ minscore = mindisfrom[i];
}
}
+ jm = nearest[im];
+ if( jm < im )
+ {
+ j=jm; jm=im; im=j;
+ }
-// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
- intpt = topol[k][0];
prevnode = hist[im];
+ nmemim = nmemar[im];
+// intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
+ intpt = topol[k][0];
if( prevnode == -1 )
{
*intpt++ = im;
*intpt = -1;
}
- intpt = topol[k][1];
prevnode = hist[jm];
+ nmemjm = nmemar[jm];
+// intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
+ intpt = topol[k][1];
if( prevnode == -1 )
{
*intpt++ = jm;
minscore *= 0.5;
- len[k][0] = (double)( minscore - tmptmplen[im] );
- len[k][1] = (double)( minscore - tmptmplen[jm] );
-
- tmptmplen[im] = minscore;
+ len[k][0] = ( minscore - tmptmplen[im] );
+ len[k][1] = ( minscore - tmptmplen[jm] );
-#if 0
- free( tmptmplen );
- tmptmplen = AllocateIntVec( nseq );
-#endif
+ tmptmplen[im] = minscore;
hist[im] = k;
+ nmemar[im] = nmemim + nmemjm;
- for( i=0; i!=-1; i=ac[i].next )
+ mindisfrom[im] = 999.9;
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
+ i = acpti->pos;
if( i != im && i != jm )
{
if( i < im )
}
eff0 = eff[miniim][maxiim];
eff1 = eff[minijm][maxijm];
- eff[miniim][maxiim] =
- (int) ( (float)MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) + (float)( eff0 + eff1 ) * 0.5 * SUEFF );
+#if 0
+ tmpdouble = eff[miniim][maxiim] =
+ MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
+#else
+ tmpdouble = eff[miniim][maxiim] =
+ (clusterfuncpt[0])( eff0, eff1 );
+#endif
+ if( tmpdouble < mindisfrom[i] )
+ {
+ mindisfrom[i] = tmpdouble;
+ nearest[i] = im;
+ }
+ if( tmpdouble < mindisfrom[im] )
+ {
+ mindisfrom[im] = tmpdouble;
+ nearest[im] = i;
+ }
+ if( nearest[i] == jm )
+ {
+ nearest[i] = im;
+ }
}
}
+#if 0
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ strcpy( tree[im], treetmp );
+#else
+ treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
+ if( !treetmp )
+ {
+ reporterr( "Cannot allocate treetmp\n" );
+ exit( 1 );
+ }
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ free( tree[im] );
+ free( tree[jm] );
+ tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
+ tree[jm] = NULL;
+ if( tree[im] == NULL )
+ {
+ reporterr( "Cannot reallocate tree!\n" );
+ exit( 1 );
+ }
+ strcpy( tree[im], treetmp );
+#endif
+
acjmprev = ac[jm].prev;
acjmnext = ac[jm].next;
- ac[acjmprev].next = acjmnext;
- if( acjmnext != -1 )
- ac[acjmnext].prev = acjmprev;
+ acjmprev->next = acjmnext;
+ if( acjmnext != NULL )
+ acjmnext->prev = acjmprev;
+// free( (void *)eff[jm] ); eff[jm] = NULL;
+
+#if 1 // muscle seems to miss this.
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+ if( nearest[i] == im )
+ {
+ if( i < im )
+ {
+ miniim = i;
+ maxiim = im;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ }
+ if( eff[miniim][maxiim] > mindisfrom[i] )
+ setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i );
+ }
+ }
+#endif
+
+
#if 0
- fprintf( stdout, "STEP-%03d:\n", k+1 );
+ fprintf( stdout, "\nvSTEP-%03d:\n", k+1 );
fprintf( stdout, "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
+ for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
fprintf( stdout, "\n" );
fprintf( stdout, "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
+ for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
fprintf( stdout, "\n" );
#endif
}
- FreeIntMtx( eff ); eff = NULL;
+ fp = fopen( "infile.tree", "w" );
+ fprintf( fp, "%s\n", treetmp );
+ fclose( fp );
+#if 0
+ FreeCharMtx( tree );
+#else
+ free( tree[0] );
+ free( tree );
+#endif
+ free( treetmp );
+ free( nametmp );
free( (void *)tmptmplen ); tmptmplen = NULL;
free( hist ); hist = NULL;
free( (char *)ac ); ac = NULL;
+ free( (void *)nmemar ); nmemar = NULL;
+ free( mindisfrom );
+ free( nearest );
}
-void fastsupg( int nseq, double **oeff, int ***topol, double **len )
+
+void fixed_supg_double_treeout_constrained( int nseq, double **eff, int ***topol, double **len, char **name, int ngroup, int **groups )
{
- int i, j, k, miniim, maxiim, minijm, maxijm;
-#if 0
- double eff[nseq][nseq];
- char pair[njob][njob];
-#else
- static float *tmplen;
- int *intpt;
- float tmpfloat;
- float eff1, eff0;
- static float **eff = NULL;
- static char **pair = NULL;
- static Achain *ac;
- float minscore;
- int im = -1, jm = -1;
- if( !eff )
- {
- eff = AllocateFloatMtx( njob, njob );
- pair = AllocateCharMtx( njob, njob );
- tmplen = AllocateFloatVec( njob );
- ac = (Achain *)calloc( njob, sizeof( Achain ) );
+ int i, j, k, miniim, maxiim, minijm, maxijm;
+ int *intpt, *intpt2;
+ double tmpdouble;
+ double eff1, eff0;
+ static double *tmptmplen = NULL;
+ static int *hist = NULL;
+ static Bchain *ac = NULL;
+ int im = -1, jm = -1;
+ Bchain *acjmnext, *acjmprev;
+ int prevnode;
+ Bchain *acpti, *acptj;
+ int *pt1, *pt2, *pt11, *pt22;
+ static int *nmemar;
+ int nmemim, nmemjm;
+ double minscore;
+ int *nearest = NULL; // by D.Mathog, a guess
+ double *mindisfrom = NULL; // by D.Mathog, a guess
+ static char **tree;
+ static char *treetmp;
+ static char *nametmp, *nameptr, *tmpptr;
+ FILE *fp;
+ double (*clusterfuncpt[1])(double,double);
+ char namec;
+ int *testtopol, **inconsistent;
+ int **inconsistentpairlist;
+ int ninconsistentpairs;
+ int *warned;
+ int allinconsistent;
+ int firsttime;
+
+ increaseintergroupdistancesfullmtx( eff, ngroup, groups, nseq );
+
+ sueff1 = 1 - sueff_global;
+ sueff05 = sueff_global * 0.5;
+ if ( treemethod == 'X' )
+ clusterfuncpt[0] = cluster_mix_double;
+ else if ( treemethod == 'E' )
+ clusterfuncpt[0] = cluster_average_double;
+ else if ( treemethod == 'q' )
+ clusterfuncpt[0] = cluster_minimum_double;
+ else
+ {
+ reporterr( "Unknown treemethod, %c\n", treemethod );
+ exit( 1 );
}
-#endif
-
- for( i=0; i<nseq; i++ )
+
+
+
+
+
+#if 0
+ if( !hist )
{
- for( j=0; j<nseq; j++ )
+ hist = AllocateIntVec( njob );
+ tmptmplen = AllocateDoubleVec( njob );
+ ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
+ nmemar = AllocateIntVec( njob );
+ mindisfrom = AllocateDoubleVec( njob );
+ nearest = AllocateIntVec( njob );
+ treetmp = AllocateCharVec( njob*150 );
+ nametmp = AllocateCharVec( 91 );
+ tree = AllocateCharMtx( njob, njob*150 );
+ }
+ for( i=0; i<nseq; i++ )
+ {
+ for( j=0; j<90; j++ ) nametmp[j] = 0;
+ for( j=0; j<90; j++ )
{
- eff[i][j] = (float)oeff[i][j];
+ if( name[i][j] == 0 )
+ break;
+ else if( isalnum( name[i][j] ) )
+ nametmp[j] = name[i][j];
+ else
+ nametmp[j] = '_';
}
+ nametmp[90] = 0;
+// sprintf( tree[i], "%d_%.60s", i+1, nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
+
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ sprintf( tree[i], "\n%d_%.60s\n", i+1, nameptr );
}
- for( i=0; i<nseq; i++ )
+#else
+
+ if( !hist )
{
- ac[i].next = i+1;
- ac[i].prev = i-1;
-// ac[i].curr = i;
+ hist = AllocateIntVec( njob );
+ tmptmplen = AllocateDoubleVec( njob );
+ ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
+ nmemar = AllocateIntVec( njob );
+ mindisfrom = AllocateDoubleVec( njob );
+ nearest = AllocateIntVec( njob );
+// treetmp = AllocateCharVec( njob * ( B + 100 ) ); // nagasugi?
+ treetmp = NULL; // kentou 2013/06/12
+ nametmp = AllocateCharVec( 1000 ); // nagasugi
+// tree = AllocateCharMtx( njob, njob*600 );
+ tree = AllocateCharMtx( njob, 0 );
+ testtopol = AllocateIntVec( njob + 1 );
+ inconsistent = AllocateIntMtx( njob, njob ); // muda
+// inconsistentpairlist = AllocateIntMtx( njob*(njob-1)/2+1, 2 ); // muda
+ inconsistentpairlist = AllocateIntMtx( 1, 2 );
+ warned = AllocateIntVec( ngroup );
}
- ac[nseq-1].next = -1;
- for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
- for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
- for( i=0; i<nseq; i++ ) pair[i][i] = 1;
+
+ for( i=0; i<nseq; i++ )
+ {
+ for( j=0; j<999; j++ ) nametmp[j] = 0;
+ for( j=0; j<999; j++ )
+ {
+ namec = name[i][j];
+ if( namec == 0 )
+ break;
+ else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
+ nametmp[j] = namec;
+ else
+ nametmp[j] = '_';
+ }
+ nametmp[j] = 0;
+// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
- fprintf( stderr, "\n" );
- for( k=0; k<nseq-1; k++ )
- {
- if( k % 10 == 0 ) fprintf( stderr, "%d / %d\r", k, nseq );
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
- minscore = 9999.0;
- for( i=0; ac[i].next!=-1; i=ac[i].next )
-// for( i=0; i<nseq-1; i++ )
+ tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
+ if( tree[i] == NULL )
{
- for( j=ac[i].next; j!=-1; j=ac[j].next )
-// for( j=i+1; j<nseq; j++ )
- {
- tmpfloat = eff[i][j];
- if( tmpfloat < minscore )
- {
- minscore = tmpfloat;
- im = i; jm = j;
- }
- }
+ reporterr( "Cannot allocate tree!\n" );
+ exit( 1 );
}
+ sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
+ }
-// fprintf( stderr, "im=%d, jm=%d\n", im, jm );
+#endif
- intpt = topol[k][0];
- for( i=0; i<nseq; i++ )
- if( pair[im][i] > 0 )
- *intpt++ = i;
- *intpt = -1;
- intpt = topol[k][1];
- for( i=0; i<nseq; i++ )
- if( pair[jm][i] > 0 )
- *intpt++ = i;
- *intpt = -1;
- minscore /= 2.0;
- len[k][0] = (double)minscore - tmplen[im];
- len[k][1] = (double)minscore - tmplen[jm];
- tmplen[im] = (double)minscore;
- for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
- for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
-// for( i=0; i<nseq; i++ )
- for( i=0; i!=-1; i=ac[i].next )
- {
- if( i != im && i != jm )
- {
- if( i < im )
- {
- miniim = i;
- maxiim = im;
- minijm = i;
- maxijm = jm;
- }
- else if( i < jm )
- {
- miniim = im;
- maxiim = i;
- minijm = i;
- maxijm = jm;
- }
- else
- {
- miniim = im;
- maxiim = i;
- minijm = jm;
- maxijm = i;
- }
- eff0 = eff[miniim][maxiim];
- eff1 = eff[minijm][maxijm];
- eff[miniim][maxiim] =
- MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
- ( eff0 + eff1 ) * 0.5 * SUEFF;
-// eff[minijm][maxijm] = 9999.0;
- }
- }
- ac[ac[jm].prev].next = ac[jm].next;
- ac[ac[jm].next].prev = ac[jm].prev;
-// eff[im][jm] = 9999.0;
-#if 0
- fprintf( stderr, "STEP-%03d:\n", k+1 );
- fprintf( stderr, "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][0][i] );
- fprintf( stderr, "\n" );
- fprintf( stderr, "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stderr, " %03d", topol[k][1][i] );
- fprintf( stderr, "\n" );
-#endif
- }
- fprintf( stderr, "\n" );
-// FreeFloatMtx( eff );
-// FreeCharMtx( pair );
-// FreeFloatVec( tmplen );
-// free( ac );
-}
-void supg( int nseq, double **oeff, int ***topol, double **len )
-{
- int i, j, k, miniim, maxiim, minijm, maxijm;
-#if 0
- double eff[nseq][nseq];
- char pair[njob][njob];
-#else
- static float *tmplen;
- int *intpt;
- float **floatptpt;
- float *floatpt;
- float tmpfloat;
- float eff1, eff0;
- static float **eff = NULL;
- static char **pair = NULL;
- if( !eff )
+ for( i=0; i<nseq; i++ )
{
- eff = AllocateFloatMtx( njob, njob );
- pair = AllocateCharMtx( njob, njob );
- tmplen = AllocateFloatVec( njob );
+ ac[i].next = ac+i+1;
+ ac[i].prev = ac+i-1;
+ ac[i].pos = i;
}
-#endif
+ ac[nseq-1].next = NULL;
-
+ for( i=0; i<nseq; i++ ) setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
+
+ for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
for( i=0; i<nseq; i++ )
{
- for( j=0; j<nseq; j++ )
- {
- eff[i][j] = (float)oeff[i][j];
- }
+ hist[i] = -1;
+ nmemar[i] = 1;
}
- for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
- for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
- for( i=0; i<nseq; i++ ) pair[i][i] = 1;
- for( k=0; k<nseq-1; k++ )
- {
- float minscore = 9999.0;
- int im = -1, jm = -1;
+ reporterr( "\n" );
+ ninconsistentpairs = 0;
+ for( k=0; k<nseq-1; k++ )
+ {
+ if( k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
- floatptpt = eff;
- for( i=0; i<nseq-1; i++ )
+
+// for( acpti=ac; acpti->next!=NULL; acpti=acpti->next ) for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next ) inconsistent[acpti->pos][acptj->pos] = 0;
+ for( i=0; i<ninconsistentpairs; i++ ) inconsistent[inconsistentpairlist[i][0]][inconsistentpairlist[i][1]] = 0;
+ ninconsistentpairs = 0;
+ firsttime = 1;
+ while( 1 )
{
- floatpt = *floatptpt++ + i + 1;
- for( j=i+1; j<nseq; j++ )
- {
- tmpfloat = *floatpt++;
- if( tmpfloat < minscore )
+ if( firsttime )
+ {
+ firsttime = 0;
+ minscore = 999.9;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
- minscore = tmpfloat;
- im = i; jm = j;
+ i = acpti->pos;
+// reporterr( "k=%d i=%d\n", k, i );
+ if( mindisfrom[i] < minscore ) // muscle
+ {
+ im = i;
+ minscore = mindisfrom[i];
+ }
+ }
+ jm = nearest[im];
+ if( jm < im )
+ {
+ j=jm; jm=im; im=j;
+ }
+ }
+ else
+ {
+ minscore = 999.9;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+// reporterr( "k=%d i=%d\n", k, i );
+ for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
+ {
+ j = acptj->pos;
+ if( !inconsistent[i][j] && (tmpdouble=eff[i][j]) < minscore )
+ {
+ minscore = tmpdouble;
+ im = i; jm = j;
+ }
+ }
+ for( acptj=ac; (acptj&&acptj->pos!=i); acptj=acptj->next )
+ {
+ j = acptj->pos;
+ if( !inconsistent[j][i] && (tmpdouble=eff[j][i]) < minscore )
+ {
+ minscore = tmpdouble;
+ im = j; jm = i;
+ }
+ }
}
}
- }
- intpt = topol[k][0];
- for( i=0; i<nseq; i++ )
- if( pair[im][i] > 0 )
- *intpt++ = i;
- *intpt = -1;
- intpt = topol[k][1];
- for( i=0; i<nseq; i++ )
- if( pair[jm][i] > 0 )
- *intpt++ = i;
- *intpt = -1;
+ allinconsistent = 1;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
+ {
+ for( acptj=acpti->next; acptj!=NULL; acptj=acptj->next )
+ {
+ if( inconsistent[acpti->pos][acptj->pos] == 0 )
+ {
+ allinconsistent = 0;
+ goto exitloop_d;
+ }
+ }
+ }
+ exitloop_d:
- len[k][0] = (double)minscore / 2.0 - tmplen[im];
- len[k][1] = (double)minscore / 2.0 - tmplen[jm];
+ if( allinconsistent )
+ {
+ reporterr( "\n\n\nPlease check whether the grouping is possible.\n\n\n" );
+ exit( 1 );
+ }
+#if 1
+ intpt = testtopol;
+ prevnode = hist[im];
+ if( prevnode == -1 )
+ {
+ *intpt++ = im;
+ }
+ else
+ {
+ for( intpt2=topol[prevnode][0]; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=topol[prevnode][1]; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ }
+
+ prevnode = hist[jm];
+ if( prevnode == -1 )
+ {
+ *intpt++ = jm;
+ }
+ else
+ {
+ for( intpt2=topol[prevnode][0]; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=topol[prevnode][1]; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ }
+ *intpt = -1;
+// reporterr( "testtopol = \n" );
+// for( i=0; testtopol[i]>-1; i++ ) reporterr( " %03d", testtopol[i]+1 );
+// reporterr( "\n" );
+#endif
+ for( i=0; i<ngroup; i++ )
+ {
+// reporterr( "groups[%d] = \n", i );
+// for( j=0; groups[i][j]>-1; j++ ) reporterr( " %03d", groups[i][j]+1 );
+// reporterr( "\n" );
+ if( overlapmember( testtopol, groups[i] ) )
+ {
+ if( !includemember( testtopol, groups[i] ) && !includemember( groups[i], testtopol ) )
+ {
+ if( !warned[i] )
+ {
+ warned[i] = 1;
+ reporterr( "\n###################################################################\n" );
+ reporterr( "# WARNING: Group %d is forced to be a monophyletic cluster.\n", i+1 );
+ reporterr( "###################################################################\n" );
+ }
+ inconsistent[im][jm] = 1;
+
+ inconsistentpairlist = realloc( inconsistentpairlist, (ninconsistentpairs+1)*sizeof( int * ) );
+ inconsistentpairlist[ninconsistentpairs] = malloc( sizeof( int ) * 2 );
+ inconsistentpairlist[ninconsistentpairs][0] = im;
+ inconsistentpairlist[ninconsistentpairs][1] = jm;
+ ninconsistentpairs++;
+ break;
+ }
+ }
+ }
+ if( i == ngroup )
+ {
+// reporterr( "OK\n" );
+ break;
+ }
+ }
- tmplen[im] = (double)minscore / 2.0;
- for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
- for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
- for( i=0; i<nseq; i++ )
+
+
+
+ prevnode = hist[im];
+ nmemim = nmemar[im];
+// intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
+ intpt = topol[k][0];
+ if( prevnode == -1 )
+ {
+ *intpt++ = im;
+ *intpt = -1;
+ }
+ else
+ {
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+ pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+ pt22 = pt2;
+ }
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
+ }
+
+ prevnode = hist[jm];
+ nmemjm = nmemar[jm];
+// intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
+ intpt = topol[k][1];
+ if( prevnode == -1 )
+ {
+ *intpt++ = jm;
+ *intpt = -1;
+ }
+ else
+ {
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+ pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+ pt22 = pt2;
+ }
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
+ }
+
+ minscore *= 0.5;
+
+ len[k][0] = ( minscore - tmptmplen[im] );
+ len[k][1] = ( minscore - tmptmplen[jm] );
+ if( len[k][0] < 0.0 ) len[k][0] = 0.0;
+ if( len[k][1] < 0.0 ) len[k][1] = 0.0;
+
+
+ tmptmplen[im] = minscore;
+
+ hist[im] = k;
+ nmemar[im] = nmemim + nmemjm;
+
+ mindisfrom[im] = 999.9;
+ eff[im][jm] = 999.9;
+// eff[im][jm-im] = 999.9; // bug??
+
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
+ i = acpti->pos;
if( i != im && i != jm )
{
-#if 1
if( i < im )
{
miniim = i;
minijm = jm;
maxijm = i;
}
-#else
- miniim = MIN( i, im );
- maxiim = MAX( i, im );
- minijm = MIN( i, jm );
- maxijm = MAX( i, jm );
-#endif
-#if 1
eff0 = eff[miniim][maxiim];
eff1 = eff[minijm][maxijm];
- eff[miniim][maxiim] =
- MIN( eff0, eff1 ) * ( 1.0 - SUEFF ) +
- ( eff0 + eff1 ) * 0.5 * SUEFF;
+#if 0
+ tmpdouble = eff[miniim][maxiim] =
+ MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
#else
- MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - SUEFF ) +
- ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * SUEFF;
+ tmpdouble = eff[miniim][maxiim] =
+ (clusterfuncpt[0])( eff0, eff1 );
+#endif
+
+#if 1
+ if( tmpdouble < mindisfrom[i] )
+ {
+ mindisfrom[i] = tmpdouble;
+ nearest[i] = im;
+ }
+ if( tmpdouble < mindisfrom[im] )
+ {
+ mindisfrom[im] = tmpdouble;
+ nearest[im] = i;
+ }
+ if( nearest[i] == jm )
+ {
+ nearest[i] = im;
+ }
#endif
- eff[minijm][maxijm] = 9999.0;
- eff[im][jm] = 9999.0;
}
}
-#if DEBUG
- printf( "STEP-%03d:\n", k+1 );
- printf( "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
- printf( "\n" );
- printf( "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
- printf( "\n" );
+#if 0
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ strcpy( tree[im], treetmp );
+#else
+ treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
+ if( !treetmp )
+ {
+ reporterr( "Cannot allocate treetmp\n" );
+ exit( 1 );
+ }
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ free( tree[im] );
+ free( tree[jm] );
+ tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
+ tree[jm] = NULL;
+ if( tree[im] == NULL )
+ {
+ reporterr( "Cannot reallocate tree!\n" );
+ exit( 1 );
+ }
+ strcpy( tree[im], treetmp );
+#endif
+
+ acjmprev = ac[jm].prev;
+ acjmnext = ac[jm].next;
+ acjmprev->next = acjmnext;
+ if( acjmnext != NULL )
+ acjmnext->prev = acjmprev;
+// free( (void *)eff[jm] ); eff[jm] = NULL;
+
+#if 1 // muscle seems to miss this.
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+ if( nearest[i] == im )
+ {
+ if( i < im )
+ {
+ miniim = i;
+ maxiim = im;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ }
+ if( eff[miniim][maxiim] > mindisfrom[i] )
+ setnearest_double_fullmtx( nseq, ac, eff, mindisfrom+i, nearest+i, i );
+ }
+ }
+#endif
+
+
+#if 0
+ fprintf( stdout, "\ncSTEP-%03d:\n", k+1 );
+ fprintf( stdout, "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
+ fprintf( stdout, "\n" );
+ fprintf( stdout, "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
+ fprintf( stdout, "\n" );
#endif
}
+ fp = fopen( "infile.tree", "w" );
+ fprintf( fp, "%s\n", treetmp );
+ fclose( fp );
+#if 0
+ FreeCharMtx( tree );
+#else
+ free( tree[0] );
+ free( tree );
+#endif
+ free( treetmp );
+ free( nametmp );
+ free( (void *)tmptmplen ); tmptmplen = NULL;
+ free( hist ); hist = NULL;
+ free( (char *)ac ); ac = NULL;
+ free( (void *)nmemar ); nmemar = NULL;
+ free( mindisfrom );
+ free( nearest );
+ free( testtopol );
+ FreeIntMtx( inconsistent );
+ FreeIntMtx( inconsistentpairlist );
+ free( warned );
}
-void spg( int nseq, double **oeff, int ***topol, double **len )
+void fixed_musclesupg_double_realloc_nobk_halfmtx_memsave( int nseq, double **eff, int ***topol, double **len, Treedep *dep, int progressout, int efffree )
{
- int i, j, k;
- double tmplen[M];
-#if 0
- double eff[nseq][nseq];
- char pair[njob][njob];
-#else
- double **eff = NULL;
- char **pair = NULL;
- if( !eff )
+ int i, j, k, miniim, maxiim, minijm, maxijm;
+ int *intpt;
+ double tmpdouble;
+ double eff1, eff0;
+ double *tmptmplen = NULL; // static TLS -> local, 2012/02/25
+ int *hist = NULL; // static TLS -> local, 2012/02/25
+ Bchain *ac = NULL; // static TLS -> local, 2012/02/25
+ int im = -1, jm = -1;
+ Bchain *acjmnext, *acjmprev;
+ int prevnode;
+ Bchain *acpti;
+ int *pt1, *pt2, *pt11;
+ int *nmemar; // static TLS -> local, 2012/02/25
+ int nmemim, nmemjm;
+ double minscore;
+ int *nearest = NULL; // by Mathog, a guess
+ double *mindisfrom = NULL; // by Mathog, a guess
+ double (*clusterfuncpt[1])(double,double);
+
+
+ sueff1 = 1 - (double)sueff_global;
+ sueff05 = (double)sueff_global * 0.5;
+ if ( treemethod == 'X' )
+ clusterfuncpt[0] = cluster_mix_double;
+ else if ( treemethod == 'E' )
+ clusterfuncpt[0] = cluster_average_double;
+ else if ( treemethod == 'q' )
+ clusterfuncpt[0] = cluster_minimum_double;
+ else
{
- eff = AllocateDoubleMtx( njob, njob );
- pair = AllocateCharMtx( njob, njob );
+ reporterr( "Unknown treemethod, %c\n", treemethod );
+ exit( 1 );
}
-#endif
-
- for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) eff[i][j] = oeff[i][j];
- for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
- for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
- for( i=0; i<nseq; i++ ) pair[i][i] = 1;
- for( k=0; k<nseq-1; k++ )
- {
- float minscore = 9999.0;
- int im = -1, jm = -1;
- int count;
-
- for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
- {
- if( eff[i][j] < minscore )
- {
- minscore = eff[i][j];
- im = i; jm = j;
- }
- }
- for( i=0, count=0; i<nseq; i++ )
- if( pair[im][i] > 0 )
- {
- topol[k][0][count] = i;
- count++;
- }
- topol[k][0][count] = -1;
- for( i=0, count=0; i<nseq; i++ )
- if( pair[jm][i] > 0 )
- {
- topol[k][1][count] = i;
- count++;
- }
- topol[k][1][count] = -1;
-
- len[k][0] = minscore / 2.0 - tmplen[im];
- len[k][1] = minscore / 2.0 - tmplen[jm];
-
- tmplen[im] = minscore / 2.0;
-
- for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
- for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
-
- for( i=0; i<nseq; i++ )
- {
- if( i != im && i != jm )
- {
- eff[MIN(i,im)][MAX(i,im)] =
- MIN( eff[MIN(i,im)][MAX(i,im)], eff[MIN(i,jm)][MAX(i,jm)] );
- eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
- }
- eff[im][jm] = 9999.0;
- }
-#if DEBUG
- printf( "STEP-%03d:\n", k+1 );
- printf( "len0 = %f\n", len[k][0] );
- for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
- printf( "\n" );
- printf( "len1 = %f\n", len[k][1] );
- for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
- printf( "\n" );
-#endif
- }
-}
-
-double ipower( double x, int n ) /* n > 0 */
-{
- double r;
-
- r = 1;
- while( n != 0 )
- {
- if( n & 1 ) r *= x;
- x *= x; n >>= 1;
- }
- return( r );
-}
-
-void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */
-{
- int i, j, k, s1, s2;
- static double rootnode[M];
-
- if( nseq-2 < 0 )
+ if( !hist )
{
- fprintf( stderr, "Too few sequence for countnode: nseq = %d\n", nseq );
- exit( 1 );
- }
+ hist = AllocateIntVec( njob );
+ tmptmplen = AllocateFloatVec( njob );
+ ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
+ nmemar = AllocateIntVec( njob );
+ mindisfrom = AllocateFloatVec( njob );
+ nearest = AllocateIntVec( njob );
+ }
- for( i=0; i<nseq; i++ ) rootnode[i] = 0;
- for( i=0; i<nseq-2; i++ )
- {
- for( j=0; topol[i][0][j]>-1; j++ )
- rootnode[topol[i][0][j]]++;
- for( j=0; topol[i][1][j]>-1; j++ )
- rootnode[topol[i][1][j]]++;
- for( j=0; topol[i][0][j]>-1; j++ )
- {
- s1 = topol[i][0][j];
- for( k=0; topol[i][1][k]>-1; k++ )
- {
- s2 = topol[i][1][k];
- node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
- }
- }
- }
- for( j=0; topol[nseq-2][0][j]>-1; j++ )
- {
- s1 = topol[nseq-2][0][j];
- for( k=0; topol[nseq-2][1][k]>-1; k++ )
- {
- s2 = topol[nseq-2][1][k];
- node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
- }
- }
-}
+
+ for( i=0; i<nseq; i++ )
+ {
+ ac[i].next = ac+i+1;
+ ac[i].prev = ac+i-1;
+ ac[i].pos = i;
+ }
+ ac[nseq-1].next = NULL;
-void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */
-{
- int i, j, k, s1, s2;
- int rootnode[M];
+ for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
- for( i=0; i<nseq; i++ ) rootnode[i] = 0;
- for( i=0; i<nseq-2; i++ )
- {
- for( j=0; topol[i][0][j]>-1; j++ )
- rootnode[topol[i][0][j]]++;
- for( j=0; topol[i][1][j]>-1; j++ )
- rootnode[topol[i][1][j]]++;
- for( j=0; topol[i][0][j]>-1; j++ )
- {
- s1 = topol[i][0][j];
- for( k=0; topol[i][1][k]>-1; k++ )
- {
- s2 = topol[i][1][k];
- node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
- }
- }
- }
- for( j=0; topol[nseq-2][0][j]>-1; j++ )
- {
- s1 = topol[nseq-2][0][j];
- for( k=0; topol[nseq-2][1][k]>-1; k++ )
- {
- s2 = topol[nseq-2][1][k];
- node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
- }
- }
- for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
- node[j][i] = node[i][j];
-#if DEBUG
- fprintf( stderr, "node[][] in countnode_int" );
+ for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
for( i=0; i<nseq; i++ )
{
- for( j=0; j<nseq; j++ )
- {
- fprintf( stderr, "%#3d", node[i][j] );
- }
- fprintf( stderr, "\n" );
+ hist[i] = -1;
+ nmemar[i] = 1;
}
-#endif
-}
-void counteff_simple_float( int nseq, int ***topol, float **len, double *node )
-{
- int i, j, s1, s2;
- double total;
- static double rootnode[M];
- static double eff[M];
-
-#if DEBUG
- for( i=0; i<nseq; i++ ){
- fprintf( stderr, "len0 = %f\n", len[i][0] );
- fprintf( stderr, "len1 = %f\n", len[i][1] );
- }
-#endif
- for( i=0; i<nseq; i++ )
+ if( progressout ) reporterr( "\n" );
+ for( k=0; k<nseq-1; k++ )
{
- rootnode[i] = 0.0;
- eff[i] = 1.0;
-/*
- rootnode[i] = 1.0;
-*/
- }
- for( i=0; i<nseq-1; i++ )
- {
- for( j=0; (s1=topol[i][0][j]) > -1; j++ )
+ if( progressout && k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
+
+ minscore = 999.9;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
- rootnode[s1] += (double)len[i][0] * eff[s1];
- eff[s1] *= 0.5;
-/*
- rootnode[s1] *= 0.5;
-*/
-
+ i = acpti->pos;
+// reporterr( "k=%d i=%d\n", k, i );
+ if( mindisfrom[i] < minscore ) // muscle
+ {
+ im = i;
+ minscore = mindisfrom[i];
+ }
}
- for( j=0; (s2=topol[i][1][j]) > -1; j++ )
+ jm = nearest[im];
+ if( jm < im )
{
- rootnode[s2] += (double)len[i][1] * eff[s2];
- eff[s2] *= 0.5;
-/*
- rootnode[s2] *= 0.5;
-*/
-
+ j=jm; jm=im; im=j;
}
- }
- for( i=0; i<nseq; i++ )
- {
-#if 1 /* 97.9.29 */
- rootnode[i] += GETA3;
-#endif
-#if 0
- fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] );
-#endif
- }
+
+
+ prevnode = hist[im];
+ if( dep ) dep[k].child0 = prevnode;
+ nmemim = nmemar[im];
+ intpt = topol[k][0] = (int *)realloc( topol[k][0], ( 2 ) * sizeof( int ) ); // memsave
+// intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
+ if( prevnode == -1 )
+ {
+ *intpt++ = im;
+ *intpt = -1;
+ }
+ else
+ {
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+// pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+// pt22 = pt2;
+ }
#if 1
- total = 0.0;
- for( i=0; i<nseq; i++ )
- {
- total += rootnode[i];
- }
+ *intpt++ = *pt11;
+ *intpt = -1;
#else
- total = 1.0;
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
#endif
-
- for( i=0; i<nseq; i++ )
- {
- node[i] = rootnode[i] / total;
- }
+ }
-#if 0
- fprintf( stderr, "weight array in counteff_simple\n" );
- for( i=0; i<nseq; i++ )
- fprintf( stderr, "%f\n", node[i] );
- printf( "\n" );
- exit( 1 );
+ prevnode = hist[jm];
+ if( dep ) dep[k].child1 = prevnode;
+ nmemjm = nmemar[jm];
+ intpt = topol[k][1] = (int *)realloc( topol[k][1], ( 2 ) * sizeof( int ) );
+// intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
+ if( !intpt )
+ {
+ reporterr( "Cannot reallocate topol\n" );
+ exit( 1 );
+ }
+ if( prevnode == -1 )
+ {
+ *intpt++ = jm;
+ *intpt = -1;
+ }
+ else
+ {
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+// pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+// pt22 = pt2;
+ }
+#if 1
+ *intpt++ = *pt11;
+ *intpt = -1;
+#else
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
#endif
-}
+ }
-void counteff_simple( int nseq, int ***topol, double **len, double *node )
-{
- int i, j, s1, s2;
- double total;
- static double rootnode[M];
- static double eff[M];
+ minscore *= 0.5;
-#if DEBUG
- for( i=0; i<nseq; i++ ){
- fprintf( stderr, "len0 = %f\n", len[i][0] );
- fprintf( stderr, "len1 = %f\n", len[i][1] );
- }
+ len[k][0] = ( minscore - tmptmplen[im] );
+ len[k][1] = ( minscore - tmptmplen[jm] );
+
+ if( dep ) dep[k].distfromtip = minscore;
+
+ tmptmplen[im] = minscore;
+
+ hist[im] = k;
+ nmemar[im] = nmemim + nmemjm;
+
+ mindisfrom[im] = 999.9;
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+ if( i != im && i != jm )
+ {
+ if( i < im )
+ {
+ miniim = i;
+ maxiim = im;
+ minijm = i;
+ maxijm = jm;
+ }
+ else if( i < jm )
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = i;
+ maxijm = jm;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = jm;
+ maxijm = i;
+ }
+ eff0 = eff[miniim][maxiim-miniim];
+ eff1 = eff[minijm][maxijm-minijm];
+ tmpdouble = eff[miniim][maxiim-miniim] =
+#if 0
+ MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
+#else
+ (clusterfuncpt[0])( eff0, eff1 );
#endif
- for( i=0; i<nseq; i++ )
- {
- rootnode[i] = 0.0;
- eff[i] = 1.0;
-/*
- rootnode[i] = 1.0;
-*/
- }
- for( i=0; i<nseq-1; i++ )
- {
- for( j=0; (s1=topol[i][0][j]) > -1; j++ )
+ if( tmpdouble < mindisfrom[i] )
+ {
+ mindisfrom[i] = tmpdouble;
+ nearest[i] = im;
+ }
+ if( tmpdouble < mindisfrom[im] )
+ {
+ mindisfrom[im] = tmpdouble;
+ nearest[im] = i;
+ }
+ if( nearest[i] == jm )
+ {
+ nearest[i] = im;
+ }
+ }
+ }
+
+// reporterr( "im,jm=%d,%d\n", im, jm );
+ acjmprev = ac[jm].prev;
+ acjmnext = ac[jm].next;
+ acjmprev->next = acjmnext;
+ if( acjmnext != NULL )
+ acjmnext->prev = acjmprev;
+ if( efffree )
{
- rootnode[s1] += len[i][0] * eff[s1];
- eff[s1] *= 0.5;
-/*
- rootnode[s1] *= 0.5;
-*/
-
+ free( (void *)eff[jm] ); eff[jm] = NULL;
}
- for( j=0; (s2=topol[i][1][j]) > -1; j++ )
+
+#if 1 // muscle seems to miss this.
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
- rootnode[s2] += len[i][1] * eff[s2];
- eff[s2] *= 0.5;
-/*
- rootnode[s2] *= 0.5;
-*/
-
+ i = acpti->pos;
+ if( nearest[i] == im )
+ {
+ if( i < im )
+ {
+ miniim = i;
+ maxiim = im;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ }
+ if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
+ setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
+ }
}
- }
- for( i=0; i<nseq; i++ )
- {
-#if 1 /* 97.9.29 */
- rootnode[i] += GETA3;
#endif
+
+
#if 0
- fprintf( stderr, "### rootnode for %d = %f\n", i, rootnode[i] );
+ fprintf( stdout, "vSTEP-%03d:\n", k+1 );
+ fprintf( stdout, "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
+ fprintf( stdout, "\n" );
+ fprintf( stdout, "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
+ fprintf( stdout, "\n" );
#endif
- }
-#if 1
- total = 0.0;
- for( i=0; i<nseq; i++ )
+ }
+ free( (void *)tmptmplen ); tmptmplen = NULL;
+ free( hist ); hist = NULL;
+ free( (char *)ac ); ac = NULL;
+ free( (void *)nmemar ); nmemar = NULL;
+ free( mindisfrom );
+ free( nearest );
+}
+void fixed_musclesupg_double_realloc_nobk_halfmtx( int nseq, double **eff, int ***topol, double **len, Treedep *dep, int progressout, int efffree )
+{
+ int i, j, k, miniim, maxiim, minijm, maxijm;
+ int *intpt, *intpt2;
+ double tmpdouble;
+ double eff1, eff0;
+ double *tmptmplen = NULL; // static TLS -> local, 2012/02/25
+ int *hist = NULL; // static TLS -> local, 2012/02/25
+ Bchain *ac = NULL; // static TLS -> local, 2012/02/25
+ int im = -1, jm = -1;
+ Bchain *acjmnext, *acjmprev;
+ int prevnode;
+ Bchain *acpti;
+ int *pt1, *pt2, *pt11, *pt22;
+ int *nmemar; // static TLS -> local, 2012/02/25
+ int nmemim, nmemjm;
+ double minscore;
+ int *nearest = NULL; // by Mathog, a guess
+ double *mindisfrom = NULL; // by Mathog, a guess
+ double (*clusterfuncpt[1])(double,double);
+
+
+ sueff1 = 1 - (double)sueff_global;
+ sueff05 = (double)sueff_global * 0.5;
+ if ( treemethod == 'X' )
+ clusterfuncpt[0] = cluster_mix_double;
+ else if ( treemethod == 'E' )
+ clusterfuncpt[0] = cluster_average_double;
+ else if ( treemethod == 'q' )
+ clusterfuncpt[0] = cluster_minimum_double;
+ else
{
- total += rootnode[i];
+ reporterr( "Unknown treemethod, %c\n", treemethod );
+ exit( 1 );
}
-#else
- total = 1.0;
-#endif
-
- for( i=0; i<nseq; i++ )
+
+ if( !hist )
{
- node[i] = rootnode[i] / total;
+ hist = AllocateIntVec( njob );
+ tmptmplen = AllocateFloatVec( njob );
+ ac = (Bchain *)malloc( njob * sizeof( Bchain ) );
+ nmemar = AllocateIntVec( njob );
+ mindisfrom = AllocateFloatVec( njob );
+ nearest = AllocateIntVec( njob );
}
-#if 0
- fprintf( stderr, "weight array in counteff_simple\n" );
+
for( i=0; i<nseq; i++ )
- fprintf( stderr, "%f\n", node[i] );
- printf( "\n" );
- exit( 1 );
-#endif
-}
-
+ {
+ ac[i].next = ac+i+1;
+ ac[i].prev = ac+i-1;
+ ac[i].pos = i;
+ }
+ ac[nseq-1].next = NULL;
-void counteff( int nseq, int ***topol, double **len, double **node )
-{
- int i, j, k, s1, s2;
- double rootnode[M];
- double eff[M];
+ for( i=0; i<nseq; i++ ) setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i ); // muscle
- if( mix )
+ for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
+ for( i=0; i<nseq; i++ )
{
- switch( weight )
- {
- case( 2 ):
- weight = 3;
- break;
- case( 3 ):
- weight = 2;
- break;
- default:
- ErrorExit( "mix error" );
- break;
- }
+ hist[i] = -1;
+ nmemar[i] = 1;
}
- if( weight == 2 )
+ if( progressout ) reporterr( "\n" );
+ for( k=0; k<nseq-1; k++ )
{
- for( i=0; i<nseq; i++ ) rootnode[i] = 0;
- for( i=0; i<nseq-2; i++ )
- {
- for( j=0; topol[i][0][j]>-1; j++ )
- rootnode[topol[i][0][j]]++;
- for( j=0; topol[i][1][j]>-1; j++ )
- rootnode[topol[i][1][j]]++;
- for( j=0; topol[i][0][j]>-1; j++ )
- {
- s1 = topol[i][0][j];
- for( k=0; topol[i][1][k]>-1; k++ )
- {
- s2 = topol[i][1][k];
- node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
- }
- }
- }
- for( j=0; topol[nseq-2][0][j]>-1; j++ )
- {
- s1 = topol[nseq-2][0][j];
- for( k=0; topol[nseq-2][1][k]>-1; k++ )
- {
- s2 = topol[nseq-2][1][k];
- node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
- }
- }
- for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
- node[i][j] = ipower( 0.5, (int)node[i][j] ) + geta2;
- for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
- node[j][i] = node[i][j];
- }
+ if( progressout && k % 10 == 0 ) reporterr( "\r% 5d / %d", k, nseq );
- if( weight == 3 )
- {
-#if DEBUG
- for( i=0; i<nseq; i++ ){
- fprintf( stderr, "len0 = %f\n", len[i][0] );
- fprintf( stderr, "len1 = %f\n", len[i][1] );
- }
-#endif
- for( i=0; i<nseq; i++ )
+ minscore = 999.9;
+ for( acpti=ac; acpti->next!=NULL; acpti=acpti->next )
{
- rootnode[i] = 0.0;
- eff[i] = 1.0;
-/*
- rootnode[i] = 1.0;
-*/
- }
- for( i=0; i<nseq-1; i++ )
- {
- for( j=0; (s1=topol[i][0][j]) > -1; j++ )
- {
- rootnode[s1] += len[i][0] * eff[s1];
- eff[s1] *= 0.5;
-/*
- rootnode[s1] *= 0.5;
-*/
-
- }
- for( j=0; (s2=topol[i][1][j]) > -1; j++ )
+ i = acpti->pos;
+// reporterr( "k=%d i=%d\n", k, i );
+ if( mindisfrom[i] < minscore ) // muscle
{
- rootnode[s2] += len[i][1] * eff[s2];
- eff[s2] *= 0.5;
-/*
- rootnode[s2] *= 0.5;
-*/
-
+ im = i;
+ minscore = mindisfrom[i];
}
}
- for( i=0; i<nseq; i++ )
+ jm = nearest[im];
+ if( jm < im )
{
-#if 1 /* 97.9.29 */
- rootnode[i] += GETA3;
-#endif
-#if DEBUG
- fprintf( stderr, "rootnode for %d = %f\n", i, rootnode[i] );
-#endif
+ j=jm; jm=im; im=j;
}
- for( i=0; i<nseq; i++ )
+
+
+ prevnode = hist[im];
+ if( dep ) dep[k].child0 = prevnode;
+ nmemim = nmemar[im];
+ intpt = topol[k][0] = (int *)realloc( topol[k][0], ( nmemim + 1 ) * sizeof( int ) );
+ if( prevnode == -1 )
{
- for( j=0; j<nseq; j++ )
- if( j != i )
- node[i][j] = (double)rootnode[i] * rootnode[j];
- else node[i][i] = rootnode[i];
+ *intpt++ = im;
+ *intpt = -1;
}
- }
-
-#if 0
- printf( "weight matrix in counteff\n" );
- for( i=0; i<nseq; i++ )
- {
- for( j=0; j<nseq; j++ )
+ else
{
- printf( "%f ", node[i][j] );
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+ pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+ pt22 = pt2;
+ }
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
}
- printf( "\n" );
- }
-#endif
-}
-
-float score_calcp( char *seq1, char *seq2, int len )
-{
- int k;
- int ms1, ms2;
- float tmpscore;
- int len2 = len - 2;
- tmpscore = 0.0;
- for( k=0; k<len; k++ )
- {
- ms1 = (int)seq1[k];
- ms2 = (int)seq2[k];
- if( ms1 == (int)'-' && ms2 == (int)'-' ) continue;
- tmpscore += (float)amino_dis[ms1][ms2];
-
- if( ms1 == (int)'-' )
+ prevnode = hist[jm];
+ if( dep ) dep[k].child1 = prevnode;
+ nmemjm = nmemar[jm];
+ intpt = topol[k][1] = (int *)realloc( topol[k][1], ( nmemjm + 1 ) * sizeof( int ) );
+ if( !intpt )
{
- tmpscore += (float)penalty;
- tmpscore += (float)amino_dis[ms1][ms2];
- while( (ms1=(int)seq1[++k]) == (int)'-' )
- tmpscore += (float)amino_dis[ms1][ms2];
- k--;
- if( k >len2 ) break;
- continue;
+ reporterr( "Cannot reallocate topol\n" );
+ exit( 1 );
}
- if( ms2 == (int)'-' )
+ if( prevnode == -1 )
{
- tmpscore += (float)penalty;
- tmpscore += (float)amino_dis[ms1][ms2];
- while( (ms2=(int)seq2[++k]) == (int)'-' )
- tmpscore += (float)amino_dis[ms1][ms2];
- k--;
- if( k > len2 ) break;
- continue;
+ *intpt++ = jm;
+ *intpt = -1;
}
- }
- return( tmpscore );
-}
-
-float score_calc1( char *seq1, char *seq2 ) /* method 1 */
-{
- int k;
- float score = 0.0;
- int count = 0;
- int len = strlen( seq1 );
-
- for( k=0; k<len; k++ )
- {
- if( seq1[k] != '-' && seq2[k] != '-' )
+ else
{
- score += (float)amino_dis[(int)seq1[k]][(int)seq2[k]];
- count++;
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+ pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+ pt22 = pt2;
+ }
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
}
- }
- if( count ) score /= (float)count;
- else score = 1.0;
- return( score );
-}
-float substitution_nid( char *seq1, char *seq2 )
-{
- int k;
- float s12;
- int len = strlen( seq1 );
-
- s12 = 0.0;
- for( k=0; k<len; k++ )
- if( seq1[k] != '-' && seq2[k] != '-' )
- s12 += ( seq1[k] == seq2[k] );
+ minscore *= 0.5;
-// fprintf( stdout, "s12 = %f\n", s12 );
- return( s12 );
-}
+ len[k][0] = ( minscore - tmptmplen[im] );
+ len[k][1] = ( minscore - tmptmplen[jm] );
-float substitution_score( char *seq1, char *seq2 )
-{
- int k;
- float s12;
- int len = strlen( seq1 );
-
- s12 = 0.0;
- for( k=0; k<len; k++ )
- if( seq1[k] != '-' && seq2[k] != '-' )
- s12 += amino_dis[(int)seq1[k]][(int)seq2[k]];
+ if( dep ) dep[k].distfromtip = minscore;
-// fprintf( stdout, "s12 = %f\n", s12 );
- return( s12 );
-}
+ tmptmplen[im] = minscore;
-float substitution_hosei( char *seq1, char *seq2 ) /* method 1 */
-#if 0
-{
- int k;
- float score = 0.0;
- int count = 0;
- int len = strlen( seq1 );
+ hist[im] = k;
+ nmemar[im] = nmemim + nmemjm;
- for( k=0; k<len; k++ )
- {
- if( seq1[k] != '-' && seq2[k] != '-' )
- {
- score += (float)( seq1[k] != seq2[k] );
- count++;
- }
- }
- if( count ) score /= (float)count;
- else score = 1.0;
- if( score < 0.95 ) score = - log( 1.0 - score );
- else score = 3.0;
- return( score );
-}
+ mindisfrom[im] = 999.9;
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
+ {
+ i = acpti->pos;
+ if( i != im && i != jm )
+ {
+ if( i < im )
+ {
+ miniim = i;
+ maxiim = im;
+ minijm = i;
+ maxijm = jm;
+ }
+ else if( i < jm )
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = i;
+ maxijm = jm;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = jm;
+ maxijm = i;
+ }
+ eff0 = eff[miniim][maxiim-miniim];
+ eff1 = eff[minijm][maxijm-minijm];
+ tmpdouble = eff[miniim][maxiim-miniim] =
+#if 0
+ MIN( eff0, eff1 ) * sueff1 + ( eff0 + eff1 ) * sueff05;
#else
-{
- int count = 0;
- float score;
- int iscore = 0;
- char s1, s2;
-
- while( (s1=*seq1++) )
- {
- s2 = *seq2++;
- if( s1 == '-' ) continue;
- if( s2 == '-' ) continue;
- iscore += ( s1 != s2 );
- count++;
- }
- if( count ) score = (float)iscore / count;
- else score = 1.0;
- if( score < 0.95 ) score = - log( 1.0 - score );
- else score = 3.0;
- return( score );
-}
+ (clusterfuncpt[0])( eff0, eff1 );
#endif
+ if( tmpdouble < mindisfrom[i] )
+ {
+ mindisfrom[i] = tmpdouble;
+ nearest[i] = im;
+ }
+ if( tmpdouble < mindisfrom[im] )
+ {
+ mindisfrom[im] = tmpdouble;
+ nearest[im] = i;
+ }
+ if( nearest[i] == jm )
+ {
+ nearest[i] = im;
+ }
+ }
+ }
-float substitution( char *seq1, char *seq2 ) /* method 1 */
-{
- int k;
- float score = 0.0;
- int count = 0;
- int len = strlen( seq1 );
-
- for( k=0; k<len; k++ )
- {
- if( seq1[k] != '-' && seq2[k] != '-' )
+// reporterr( "im,jm=%d,%d\n", im, jm );
+ acjmprev = ac[jm].prev;
+ acjmnext = ac[jm].next;
+ acjmprev->next = acjmnext;
+ if( acjmnext != NULL )
+ acjmnext->prev = acjmprev;
+ if( efffree )
{
- score += (float)( seq1[k] != seq2[k] );
- count++;
+ free( (void *)eff[jm] ); eff[jm] = NULL;
}
- }
- if( count ) score /= (float)count;
- else score = 1.0;
- return( score );
-}
-
-void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff )
-{
- int i, j;
-
- if( weight > 1 )
- {
- if( utree == 0 )
+#if 1 // muscle seems to miss this.
+ for( acpti=ac; acpti!=NULL; acpti=acpti->next )
{
- for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
- {
-/*
- eff[i][j] = (double)score_calc1( seq[i], seq[j] );
-*/
- eff[i][j] = (double)substitution_hosei( seq[i], seq[j] );
- /*
- fprintf( stderr, "%f\n", eff[i][j] );
- */
- }
-/*
- fprintf( stderr, "distance matrix\n" );
- for( i=0; i<nseq; i++ )
+ i = acpti->pos;
+ if( nearest[i] == im )
{
- for( j=0; j<nseq; j++ )
+ if( i < im )
{
- fprintf( stderr, "%f ", eff[i][j] );
+ miniim = i;
+ maxiim = im;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
}
- fprintf( stderr, "\n" );
+ if( eff[miniim][maxiim-miniim] > mindisfrom[i] )
+ setnearest( nseq, ac, eff, mindisfrom+i, nearest+i, i );
}
-*/
-/*
- upg( nseq, eff, topol, len );
- upg2( nseq, eff, topol, len );
-*/
- spg( nseq, eff, topol, len );
- counteff( nseq, topol, len, eff );
}
- }
- else
- {
- for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
- eff[i][j] = 1.0;
- }
-/*
-fprintf( stderr, "weight matrix\n" );
-for( i=0; i<nseq; i++ )
-{
- for( j=0; j<nseq; j++ )
- {
- fprintf( stderr, "%f ", eff[i][j] );
- }
- fprintf( stderr, "\n" );
-}
-*/
+#endif
+
+
+#if 0
+ fprintf( stdout, "vSTEP-%03d:\n", k+1 );
+ fprintf( stdout, "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i]+1 );
+ fprintf( stdout, "\n" );
+ fprintf( stdout, "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i]+1 );
+ fprintf( stdout, "\n" );
+#endif
+ }
+ free( (void *)tmptmplen ); tmptmplen = NULL;
+ free( hist ); hist = NULL;
+ free( (char *)ac ); ac = NULL;
+ free( (void *)nmemar ); nmemar = NULL;
+ free( mindisfrom );
+ free( nearest );
}
-float bscore_calc( char **seq, int s, double **eff ) /* algorithm B */
-{
- int i, j, k;
- int gb1, gb2, gc1, gc2;
- int cob;
- int nglen;
- int len = strlen( seq[0] );
- long score;
- score = 0;
- nglen = 0;
- for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
- {
- double efficient = eff[i][j];
- gc1 = 0;
- gc2 = 0;
- for( k=0; k<len; k++ )
- {
- gb1 = gc1;
- gb2 = gc2;
- gc1 = ( seq[i][k] == '-' );
- gc2 = ( seq[j][k] == '-' );
-
- cob =
- !gb1 * gc1
- * !gb2 * !gc2
- + !gb1 * !gc1
- * !gb2 * gc2
- + !gb1 * gc1
- * gb2 * !gc2
- + gb1 * !gc1
- * !gb2 * gc2
-
- + gb1 * !gc1
- * gb2 * gc2 *BEFF
- + gb1 * gc1
- * gb2 * !gc2 *BEFF
- ;
- score += (long)cob * penalty * efficient;
- score += (long)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * efficient;
- nglen += ( !gc1 * !gc2 );
- }
- }
- return( (float)score / nglen + 400.0 * !scoremtx );
-}
-void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax )
+void veryfastsupg_double_loadtree( int nseq, double **eff, int ***topol, double **len, char **name )
{
- *mseq2pt = AllocateCharMtx( njob, locnlenmax+1 );
- *mseq1pt = AllocateCharVec( locnlenmax+1 );
-}
+ int i, j, k, miniim, maxiim, minijm, maxijm;
+ int *intpt, *intpt2;
+ double eff1, eff0;
+ int *hist = NULL;
+ Achain *ac = NULL;
+ double minscore;
+ char **tree;
+ char *treetmp;
+ int im = -1, jm = -1;
+ int prevnode, acjmnext, acjmprev;
+ int *pt1, *pt2, *pt11, *pt22;
+ FILE *fp;
+ int node[2];
+ double lenfl[2];
+ char *nametmp, *nameptr, *tmpptr; //static?
+ char namec;
-void FreeTmpSeqs( char **mseq2, char *mseq1 )
-{
- FreeCharMtx( mseq2 );
- free( (char *)mseq1 );
-}
+ fp = fopen( "_guidetree", "r" );
+ if( !fp )
+ {
+ reporterr( "cannot open _guidetree\n" );
+ exit( 1 );
+ }
-void gappick0( char *aseq, char *seq )
-{
- for( ; *seq != 0; seq++ )
+ if( !hist )
{
- if( *seq != '-' )
- *aseq++ = *seq;
+// treetmp = AllocateCharVec( njob*50 );
+ treetmp = NULL;
+// tree = AllocateCharMtx( njob, njob*50 );
+ tree = AllocateCharMtx( njob, 0 );
+ nametmp = AllocateCharVec( 1000 ); // nagasugi
+ hist = AllocateIntVec( njob );
+ ac = (Achain *)malloc( njob * sizeof( Achain ) );
}
- *aseq = 0;
-}
-
-void gappick( int nseq, int s, char **aseq, char **mseq2,
- double **eff, double *effarr )
-{
- int i, j, count, countjob, len, allgap;
- len = strlen( aseq[0] );
- for( i=0, count=0; i<len; i++ )
+ for( i=0; i<nseq; i++ )
{
- allgap = 1;
- for( j=0; j<nseq; j++ ) if( j != s ) allgap *= ( aseq[j][i] == '-' );
- if( allgap == 0 )
+ for( j=0; j<999; j++ ) nametmp[j] = 0;
+ for( j=0; j<999; j++ )
{
- for( j=0, countjob=0; j<nseq; j++ )
- {
- if( j != s )
- {
- mseq2[countjob][count] = aseq[j][i];
- countjob++;
- }
- }
- count++;
+ namec = name[i][j];
+ if( namec == 0 )
+ break;
+ else if( isalnum( namec ) || namec == '/' || namec == '=' || namec == '-' || namec == '{' || namec == '}' )
+ nametmp[j] = namec;
+ else
+ nametmp[j] = '_';
}
- }
- for( i=0; i<nseq-1; i++ ) mseq2[i][count] = 0;
+ nametmp[j] = 0;
+// sprintf( tree[i], "%d_l=%d_%.20s", i+1, nlen[i], nametmp+1 );
+ if( outnumber )
+ nameptr = strstr( nametmp, "_numo_e" ) + 8;
+ else
+ nameptr = nametmp + 1;
- for( i=0, countjob=0; i<nseq; i++ )
- {
- if( i != s )
+ if( (tmpptr=strstr( nameptr, "_oe_" )) ) nameptr = tmpptr + 4; // = -> _ no tame
+
+ tree[i] = calloc( strlen( nametmp )+100, sizeof( char ) ); // suuji no bun de +100
+ if( tree[i] == NULL )
{
- effarr[countjob] = eff[s][i];
- countjob++;
+ reporterr( "Cannot allocate tree!\n" );
+ exit( 1 );
}
+ sprintf( tree[i], "\n%d_%.900s\n", i+1, nameptr );
}
-/*
-fprintf( stdout, "effarr in gappick s = %d\n", s+1 );
-for( i=0; i<countjob; i++ )
- fprintf( stdout, " %f", effarr[i] );
-printf( "\n" );
-*/
-}
+
+ for( i=0; i<nseq; i++ )
+ {
+ ac[i].next = i+1;
+ ac[i].prev = i-1;
+// ac[i].curr = i;
+ }
+ ac[nseq-1].next = -1;
-void commongappick_record( int nseq, char **seq, int *map )
-{
- int i, j, count;
- int len = strlen( seq[0] );
+ for( i=0; i<nseq; i++ ) hist[i] = -1;
+ reporterr( "\n" );
+ for( k=0; k<nseq-1; k++ )
+ {
+ if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
- for( i=0, count=0; i<=len; i++ )
- {
- /*
- allgap = 1;
- for( j=0; j<nseq; j++ )
- allgap *= ( seq[j][i] == '-' );
- if( !allgap )
- */
- for( j=0; j<nseq; j++ )
- if( seq[j][i] != '-' ) break;
- if( j != nseq )
+#if 0
+ minscore = 99999.9;
+ for( i=0; ac[i].next!=-1; i=ac[i].next )
{
- for( j=0; j<nseq; j++ )
- {
- seq[j][count] = seq[j][i];
+ for( j=ac[i].next; j!=-1; j=ac[j].next )
+ {
+ tmpdouble = eff[i][j];
+ if( tmpdouble < minscore )
+ {
+ minscore = tmpdouble;
+ im = i; jm = j;
+ }
}
- map[count] = i;
- count++;
- }
- }
-}
-
-void commongappick( int nseq, char **seq )
-{
- int i, j, count;
- int len = strlen( seq[0] );
+ }
+#else
+ lenfl[0] = lenfl[1] = -1.0;
+ loadtreeoneline( node, lenfl, fp );
+ im = node[0];
+ jm = node[1];
+ minscore = eff[im][jm];
- for( i=0, count=0; i<=len; i++ )
- {
- /*
- allgap = 1;
- for( j=0; j<nseq; j++ )
- allgap *= ( seq[j][i] == '-' );
- if( !allgap )
- */
- for( j=0; j<nseq; j++ )
- if( seq[j][i] != '-' ) break;
- if( j != nseq )
+ if( im > nseq-1 || jm > nseq-1 || tree[im] == NULL || tree[jm] == NULL )
{
- for( j=0; j<nseq; j++ )
- {
- seq[j][count] = seq[j][i];
- }
- count++;
- }
- }
-}
-
-double score_calc0( char **seq, int s, double **eff, int ex )
-{
- double tmp;
-
- if( scmtd == 4 ) tmp = score_calc4( seq, s, eff, ex );
- if( scmtd == 5 ) tmp = score_calc5( seq, s, eff, ex );
- else tmp = score_calc5( seq, s, eff, ex );
-
- return( tmp );
+ reporterr( "\n\nCheck the guide tree.\n" );
+ reporterr( "im=%d, jm=%d\n", im+1, jm+1 );
+ reporterr( "Please use newick2mafft.rb to generate a tree file from a newick tree.\n\n" );
+ exit( 1 );
+ }
-}
-/*
-float score_m_1( char **seq, int ex, double **eff )
-{
- int i, j, k;
- int len = strlen( seq[0] );
- int gb1, gb2, gc1, gc2;
- int cob;
- int nglen;
- double score;
+// reporterr( "im=%d, jm=%d, minscore = %f\n", im, jm, minscore );
- score = 0.0;
- nglen = 0;
- for( i=0; i<njob; i++ )
- {
- double efficient = eff[MIN(i,ex)][MAX(i,ex)];
- if( i == ex ) continue;
- gc1 = 0;
- gc2 = 0;
- for( k=0; k<len; k++ )
+ if( lenfl[0] == -1.0 || lenfl[1] == -1.0 )
{
- gb1 = gc1;
- gb2 = gc2;
-
- gc1 = ( seq[i][k] == '-' );
- gc2 = ( seq[ex][k] == '-' );
-
- cob =
- !gb1 * gc1
- * !gb2 * !gc2
-
- + !gb1 * !gc1
- * !gb2 * gc2
+ reporterr( "\n\nWARNING: Branch length is not given.\n" );
+ exit( 1 );
+ }
- + !gb1 * gc1
- * gb2 * !gc2
+ if( lenfl[0] < 0.0 ) lenfl[0] = 0.0;
+ if( lenfl[1] < 0.0 ) lenfl[1] = 0.0;
+#endif
- + gb1 * !gc1
- * !gb2 * gc2
-
- + gb1 * !gc1
- * gb2 * gc2 *BEFF
+// reporterr( "im=%d, jm=%d\n", im, jm );
- + gb1 * gc1
- * gb2 * !gc2 *BEFF
- ;
- score += (double)cob * penalty * efficient;
- score += (double)amino_dis[seq[i][k]][seq[ex][k]] * efficient;
- *
- nglen += ( !gc1 * !gc2 );
- *
- if( !gc1 && !gc2 ) fprintf( stdout, "%f\n", score );
+ intpt = topol[k][0];
+ prevnode = hist[im];
+ if( prevnode == -1 )
+ {
+ *intpt++ = im;
+ *intpt = -1;
}
- }
- return( (float)score / nglen + 400.0 * !scoremtx );
-}
-*/
-
-#if 0
-void sitescore( char **seq, double **eff, char sco1[], char sco2[], char sco3[] )
-{
- int i, j, k;
- int len = strlen( seq[0] );
- double tmp;
- double count;
- int ch;
- double sco[N];
-
- for( i=0; i<len; i++ )
- {
- tmp = 0.0; count = 0;
- for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
+ else
{
- /*
- if( seq[j][i] != '-' && seq[k][i] != '-' )
- */
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
{
- tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx;
- count++;
+ pt11 = pt2;
+ pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+ pt22 = pt2;
}
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
}
- if( count > 0.0 ) tmp /= count;
- else( tmp = 0.0 );
- ch = (int)( tmp/100.0 - 0.000001 );
- sprintf( sco1+i, "%c", ch+0x61 );
- }
- sco1[len] = 0;
-
- for( i=0; i<len; i++ )
- {
- tmp = 0.0; count = 0;
- for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
- {
- /*
- if( seq[j][i] != '-' && seq[k][i] != '-' )
- */
- {
- tmp += eff[j][k] * ( amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx );
- count += eff[j][k];
- }
- }
- if( count > 0.0 ) tmp /= count;
- else( tmp = 0.0 );
- tmp = ( tmp - 400 * !scoremtx ) * 2;
- if( tmp < 0 ) tmp = 0;
- ch = (int)( tmp/100.0 - 0.000001 );
- sprintf( sco2+i, "%c", ch+0x61 );
- sco[i] = tmp;
- }
- sco2[len] = 0;
- for( i=WIN; i<len-WIN; i++ )
- {
- tmp = 0.0;
- for( j=i-WIN; j<=i+WIN; j++ )
+ intpt = topol[k][1];
+ prevnode = hist[jm];
+ if( prevnode == -1 )
{
- tmp += sco[j];
+ *intpt++ = jm;
+ *intpt = -1;
}
- for( j=0; j<njob; j++ )
+ else
{
- if( seq[j][i] == '-' )
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
{
- tmp = 0.0;
- break;
+ pt11 = pt2;
+ pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+ pt22 = pt2;
}
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
}
- tmp /= WIN * 2 + 1;
- ch = (int)( tmp/100.0 - 0.0000001 );
- sprintf( sco3+i, "%c", ch+0x61 );
- }
- for( i=0; i<WIN; i++ ) sco3[i] = '-';
- for( i=len-WIN; i<len; i++ ) sco3[i] = '-';
- sco3[len] = 0;
-}
-#endif
-void strins( char *str1, char *str2 )
-{
- char *bk;
- int len1 = strlen( str1 );
- int len2 = strlen( str2 );
+ minscore *= 0.5;
- bk = str2;
- str2 += len1+len2;
- str1 += len1-1;
+#if 0
+ len[k][0] = minscore - tmptmplen[im];
+ len[k][1] = minscore - tmptmplen[jm];
+#else
+ len[k][0] = lenfl[0];
+ len[k][1] = lenfl[1];
+#endif
- while( str2 >= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog
- while( str2 >= bk ) { *str2-- = *str1--; }
-}
-int isaligned( int nseq, char **seq )
-{
- int i;
- int len = strlen( seq[0] );
- for( i=1; i<nseq; i++ )
- {
- if( strlen( seq[i] ) != len ) return( 0 );
- }
- return( 1 );
-}
-
-double score_calc_for_score( int nseq, char **seq )
-{
- int i, j, k, c;
- int len = strlen( seq[0] );
- double score;
- double tmpscore;
- char *mseq1, *mseq2;
+ hist[im] = k;
- score = 0.0;
- for( i=0; i<nseq-1; i++ )
- {
- for( j=i+1; j<nseq; j++ )
+ for( i=0; i!=-1; i=ac[i].next )
{
- mseq1 = seq[i];
- mseq2 = seq[j];
- tmpscore = 0.0;
- c = 0;
- for( k=0; k<len; k++ )
+ if( i != im && i != jm )
{
- if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
- c++;
- if( mseq1[k] == '-' )
- {
- tmpscore += penalty - n_dis[0][24];
- while( mseq1[++k] == '-' )
- ;
- k--;
- if( k > len-2 ) break;
- continue;
- }
- if( mseq2[k] == '-' )
- {
- tmpscore += penalty - n_dis[0][24];
- while( mseq2[++k] == '-' )
- ;
- k--;
- if( k > len-2 ) break;
- continue;
- }
+ if( i < im )
+ {
+ miniim = i;
+ maxiim = im;
+ minijm = i;
+ maxijm = jm;
+ }
+ else if( i < jm )
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = i;
+ maxijm = jm;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = jm;
+ maxijm = i;
+ }
+ eff0 = eff[miniim][maxiim];
+ eff1 = eff[minijm][maxijm];
+ eff[miniim][maxiim] =
+ MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) +
+ ( eff0 + eff1 ) * 0.5 * sueff_global;
}
- score += (double)tmpscore / (double)c;
-#if DEBUG
- printf( "tmpscore in mltaln9.c = %f\n", tmpscore );
- printf( "tmpscore / c = %f\n", tmpscore/(double)c );
-#endif
}
- }
- fprintf( stderr, "raw score = %f\n", score );
- score /= (double)nseq * ( nseq-1.0 ) / 2.0;
- score += 400.0;
-#if DEBUG
- printf( "score in mltaln9.c = %f\n", score );
-#endif
- return( (double)score );
-}
-
-void floatncpy( float *vec1, float *vec2, int len )
-{
- while( len-- )
- *vec1++ = *vec2++;
-}
-
-float score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */
-{
- int i, j, k;
- int gb1, gb2, gc1, gc2;
- int cob;
- int nglen;
- int len = strlen( seq[0] );
- float score;
+ acjmprev = ac[jm].prev;
+ acjmnext = ac[jm].next;
+ ac[acjmprev].next = acjmnext;
+ if( acjmnext != -1 )
+ ac[acjmnext].prev = acjmprev;
- score = 0;
- nglen = 0;
- for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
- {
- double efficient = eff[i][j];
- gc1 = 0;
- gc2 = 0;
- for( k=0; k<len; k++ )
+ treetmp = realloc( treetmp, strlen( tree[im] ) + strlen( tree[jm] ) + 100 ); // 22 de juubunn (:%7,:%7) %7 ha minus kamo
+ if( !treetmp )
{
- gb1 = gc1;
- gb2 = gc2;
-
- gc1 = ( seq[i][k] == '-' );
- gc2 = ( seq[j][k] == '-' );
-
- cob =
- !gb1 * gc1
- * !gb2 * !gc2
-
- + gb1 * !gc1
- * !gb2 * !gc2
+ reporterr( "Cannot allocate treetmp\n" );
+ exit( 1 );
+ }
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ free( tree[im] );
+ free( tree[jm] );
+ tree[im] = calloc( strlen( treetmp )+1, sizeof( char ) );
+ tree[jm] = NULL;
+ if( tree[im] == NULL )
+ {
+ reporterr( "Cannot reallocate tree!\n" );
+ exit( 1 );
+ }
+ strcpy( tree[im], treetmp );
- + !gb1 * !gc1
- * !gb2 * gc2
+// sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+// strcpy( tree[im], treetmp );
- + !gb1 * !gc1
- * gb2 * !gc2
+#if 0
+ fprintf( stdout, "STEP-%03d:\n", k+1 );
+ fprintf( stdout, "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
+ fprintf( stdout, "\n" );
+ fprintf( stdout, "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
+ fprintf( stdout, "\n" );
+#endif
+ }
+ fclose( fp );
- + !gb1 * gc1
- * gb2 * !gc2
- + gb1 * !gc1
- * !gb2 * gc2
-
- + gb1 * !gc1
- * gb2 * gc2
+ fp = fopen( "infile.tree", "w" );
+ fprintf( fp, "%s\n", treetmp );
+// fprintf( fp, "by veryfastsupg_double_loadtree\n" );
+ fclose( fp );
- + gb1 * gc1
- * gb2 * !gc2
-
- + !gb1 * gc1
- * gb2 * gc2
+#if 1
+ reporterr( "\n" );
+ free( hist );
+ free( (char *)ac );
+ FreeCharMtx( tree );
+ free( treetmp );
+ free( nametmp );
+#endif
- + gb1 * gc1
- * !gb2 * gc2
- ;
- score += 0.5 * (float)cob * penalty * efficient;
- score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
- nglen += ( !gc1 * !gc2 );
+#if 0
+// reporterr( "reconstructing eff[][]\n" ); // Tsune ni hat2 ha aru node koreha iranai.
+ for( k=0; k<nseq; k++ ) for( i=0; i<nseq; i++ ) eff[i][k] = 0.0;
+ for( k=0; k<nseq-1; k++ )
+ {
+ reporterr( "len[k][0], len[k][1] = %f, %f\n", len[k][0], len[k][1] );
+ for( i=0; (im=topol[k][0][i])>-1; i++ )
+ {
+ reporterr( " %03d", im );
+ }
+ fprintf( stdout, "\n" );
+ for( i=0; (jm=topol[k][1][i])>-1; i++ )
+ {
+ reporterr( " %03d", jm );
+ }
+ for( i=0; (im=topol[k][0][i])>-1; i++ ) for( j=0; (jm=topol[k][1][j])>-1; j++ )
+ {
+ eff[im][jm] += len[k][0] + len[k][1];
+ eff[jm][im] += len[k][0] + len[k][1];
}
}
- return( (float)score / nglen + 400.0 * !scoremtx );
+#endif
}
-
-float score_calc_s( char **seq, int s, double **eff ) /* algorithm S, not used */
+#if 0
+void veryfastsupg_double( int nseq, double **eff, int ***topol, double **len )
{
- int i, j, k;
- int gb1, gb2, gc1, gc2;
- int cob;
- int nglen;
- int len = strlen( seq[0] );
- float score;
-
- score = 0;
- nglen = 0;
- for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
+ int i, j, k, miniim, maxiim, minijm, maxijm;
+ int *intpt, *intpt2;
+ double tmpdouble;
+ double eff1, eff0;
+ static double *tmptmplen = NULL;
+ static int *hist = NULL;
+ static Achain *ac = NULL;
+ double minscore;
+ int im = -1, jm = -1;
+ int prevnode, acjmnext, acjmprev;
+ int *pt1, *pt2, *pt11, *pt22;
+ if( !hist )
{
- double efficient = eff[i][j];
-
- gc1 = 0;
- gc2 = 0;
- for( k=0; k<len; k++ )
- {
- gb1 = gc1;
- gb2 = gc2;
-
- gc1 = ( seq[i][k] == '-' );
- gc2 = ( seq[j][k] == '-' );
-
- cob =
- !gb1 * gc1
- * !gb2 * !gc2
+ hist = AllocateIntVec( njob );
+ tmptmplen = (double *)malloc( njob * sizeof( double ) );
+ ac = (Achain *)malloc( njob * sizeof( Achain ) );
+ }
+
+ for( i=0; i<nseq; i++ )
+ {
+ ac[i].next = i+1;
+ ac[i].prev = i-1;
+// ac[i].curr = i;
+ }
+ ac[nseq-1].next = -1;
- + gb1 * !gc1
- * !gb2 * !gc2
+ for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
+ for( i=0; i<nseq; i++ ) hist[i] = -1;
- + !gb1 * !gc1
- * !gb2 * gc2
+ reporterr( "\n" );
+ for( k=0; k<nseq-1; k++ )
+ {
+ if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
- + !gb1 * !gc1
- * gb2 * !gc2
+ minscore = 99999.9;
+ for( i=0; ac[i].next!=-1; i=ac[i].next )
+ {
+ for( j=ac[i].next; j!=-1; j=ac[j].next )
+ {
+ tmpdouble = eff[i][j];
+ if( tmpdouble < minscore )
+ {
+ minscore = tmpdouble;
+ im = i; jm = j;
+ }
+ }
+ }
- + !gb1 * gc1
- * gb2 * !gc2
+// reporterr( "im=%d, jm=%d\n", im, jm );
- + gb1 * !gc1
- * !gb2 * gc2
-
-#if 0
- + gb1 * !gc1
- * gb2 * gc2
-
- + gb1 * gc1
- * gb2 * !gc2
-
- + !gb1 * gc1
- * gb2 * gc2
-
- + gb1 * gc1
- * !gb2 * gc2
-#endif
- ;
- score += 0.5 * (float)cob * penalty * efficient;
- score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]] * (float)efficient;
- nglen += ( !gc1 * !gc2 );
+ intpt = topol[k][0];
+ prevnode = hist[im];
+ if( prevnode == -1 )
+ {
+ *intpt++ = im;
+ *intpt = -1;
}
- }
- return( (float)score / nglen + 400.0 );
-}
-
-double score_calc_for_score_s( int s, char **seq ) /* algorithm S */
-{
- int i, j, k;
- int gb1, gb2, gc1, gc2;
- int cob;
- int nglen;
- int len = strlen( seq[0] );
- float score;
-
- score = 0;
- nglen = 0;
- for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
- {
-
- gc1 = 0;
- gc2 = 0;
- for( k=0; k<len; k++ )
+ else
{
- gb1 = gc1;
- gb2 = gc2;
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+ pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+ pt22 = pt2;
+ }
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
+ }
- gc1 = ( seq[i][k] == '-' );
- gc2 = ( seq[j][k] == '-' );
-
- cob =
- !gb1 * gc1
- * !gb2 * !gc2
+ intpt = topol[k][1];
+ prevnode = hist[jm];
+ if( prevnode == -1 )
+ {
+ *intpt++ = jm;
+ *intpt = -1;
+ }
+ else
+ {
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+ pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+ pt22 = pt2;
+ }
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
+ }
- + gb1 * !gc1
- * !gb2 * !gc2
+ minscore *= 0.5;
- + !gb1 * !gc1
- * !gb2 * gc2
+ len[k][0] = minscore - tmptmplen[im];
+ len[k][1] = minscore - tmptmplen[jm];
- + !gb1 * !gc1
- * gb2 * !gc2
+ tmptmplen[im] = minscore;
- + !gb1 * gc1
- * gb2 * !gc2
+ hist[im] = k;
- + gb1 * !gc1
- * !gb2 * gc2
-
+ for( i=0; i!=-1; i=ac[i].next )
+ {
+ if( i != im && i != jm )
+ {
+ if( i < im )
+ {
+ miniim = i;
+ maxiim = im;
+ minijm = i;
+ maxijm = jm;
+ }
+ else if( i < jm )
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = i;
+ maxijm = jm;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = jm;
+ maxijm = i;
+ }
+ eff0 = eff[miniim][maxiim];
+ eff1 = eff[minijm][maxijm];
+ eff[miniim][maxiim] =
+ MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) +
+ ( eff0 + eff1 ) * 0.5 * sueff_global;
+ }
+ }
+ acjmprev = ac[jm].prev;
+ acjmnext = ac[jm].next;
+ ac[acjmprev].next = acjmnext;
+ if( acjmnext != -1 )
+ ac[acjmnext].prev = acjmprev;
#if 0
- + gb1 * !gc1
- * gb2 * gc2
-
- + gb1 * gc1
- * gb2 * !gc2
-
- + !gb1 * gc1
- * gb2 * gc2
-
- + gb1 * gc1
- * !gb2 * gc2
+ fprintf( stdout, "STEP-%03d:\n", k+1 );
+ fprintf( stdout, "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
+ fprintf( stdout, "\n" );
+ fprintf( stdout, "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
+ fprintf( stdout, "\n" );
#endif
- ;
- score += 0.5 * (float)cob * penalty;
- score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
- nglen += ( !gc1 * !gc2 );
- }
-#if 0
- fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
- fprintf( stderr, "score = %f\n", score );
+ }
+#if 1
+ reporterr( "\n" );
+ free( (void *)tmptmplen ); tmptmplen = NULL;
+ free( hist ); hist = NULL;
+ free( (char *)ac ); ac = NULL;
#endif
- }
- return( (double)score / nglen + 400.0 );
}
+#endif
-double SSPscore___( int s, char **seq, int ex ) /* algorithm S */
+void veryfastsupg_double_outtree( int nseq, double **eff, int ***topol, double **len, char **name ) // not used
{
- int i, j, k;
- int gb1, gb2, gc1, gc2;
- int cob;
- int nglen;
- int len = strlen( seq[0] );
- float score;
+ int i, j, k, miniim, maxiim, minijm, maxijm;
+ int *intpt, *intpt2;
+ double tmpdouble;
+ double eff1, eff0;
+ static double *tmptmplen = NULL;
+ static int *hist = NULL;
+ static Achain *ac = NULL;
+ double minscore;
+ static char **tree;
+ static char *treetmp;
+ static char *nametmp;
+ FILE *fpout;
+ int im = -1, jm = -1;
+ int prevnode, acjmnext, acjmprev;
+ int *pt1, *pt2, *pt11, *pt22;
+ double (*clusterfuncpt[1])(double,double);
- score = 0;
- nglen = 0;
- i=ex; for( j=0; j<s; j++ )
+
+ sueff1 = 1 - sueff_global;
+ sueff05 = sueff_global * 0.5;
+ if ( treemethod == 'X' )
+ clusterfuncpt[0] = cluster_mix_double;
+ else if ( treemethod == 'E' )
+ clusterfuncpt[0] = cluster_average_double;
+ else if ( treemethod == 'q' )
+ clusterfuncpt[0] = cluster_minimum_double;
+ else
{
+ reporterr( "Unknown treemethod, %c\n", treemethod );
+ exit( 1 );
+ }
- if( j == ex ) continue;
+ if( !hist )
+ {
+ treetmp = AllocateCharVec( njob*50 );
+ tree = AllocateCharMtx( njob, njob*50 );
+ hist = AllocateIntVec( njob );
+ tmptmplen = (double *)malloc( njob * sizeof( double ) );
+ ac = (Achain *)malloc( njob * sizeof( Achain ) );
+ nametmp = AllocateCharVec( 31 );
+ }
- gc1 = 0;
- gc2 = 0;
- for( k=0; k<len; k++ )
+// for( i=0; i<nseq; i++ ) sprintf( tree[i], "%d", i+1 );
+ for( i=0; i<nseq; i++ )
+ {
+ for( j=0; j<30; j++ ) nametmp[j] = 0;
+ for( j=0; j<30; j++ )
{
- gb1 = gc1;
- gb2 = gc2;
+ if( isalnum( name[i][j] ) )
+ nametmp[j] = name[i][j];
+ else
+ nametmp[j] = '_';
+ }
+ nametmp[30] = 0;
+ sprintf( tree[i], "%d_%.20s", i+1, nametmp+1 );
+ }
+
+ for( i=0; i<nseq; i++ )
+ {
+ ac[i].next = i+1;
+ ac[i].prev = i-1;
+// ac[i].curr = i;
+ }
+ ac[nseq-1].next = -1;
- gc1 = ( seq[i][k] == '-' );
- gc2 = ( seq[j][k] == '-' );
-
- cob =
- !gb1 * gc1
- * !gb2 * !gc2
+ for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
+ for( i=0; i<nseq; i++ ) hist[i] = -1;
- + gb1 * !gc1
- * !gb2 * !gc2
-
- + !gb1 * !gc1
- * !gb2 * gc2
-
- + !gb1 * !gc1
- * gb2 * !gc2
-
- + !gb1 * gc1
- * gb2 * !gc2 * 2.0
-
- + gb1 * !gc1
- * !gb2 * gc2 * 2.0
-
-#if 0
- + gb1 * !gc1
- * gb2 * gc2
-
- + gb1 * gc1
- * gb2 * !gc2
-
- + !gb1 * gc1
- * gb2 * gc2
-
- + gb1 * gc1
- * !gb2 * gc2
-#endif
- ;
- score += 0.5 * (float)cob * penalty;
- score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
- nglen += ( !gc1 * !gc2 ); /* tsukawanai */
- }
-#if 0
- fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
- fprintf( stderr, "score = %f\n", score );
-#endif
- }
- return( (double)score );
-}
-
-double SSPscore( int s, char **seq ) /* algorithm S */
-{
- int i, j, k;
- int gb1, gb2, gc1, gc2;
- int cob;
- int nglen;
- int len = strlen( seq[0] );
- float score;
-
- score = 0;
- nglen = 0;
- for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
- {
+ reporterr( "\n" );
+ for( k=0; k<nseq-1; k++ )
+ {
+ if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
- gc1 = 0;
- gc2 = 0;
- for( k=0; k<len; k++ )
+ minscore = 99999.9;
+ for( i=0; ac[i].next!=-1; i=ac[i].next )
{
- gb1 = gc1;
- gb2 = gc2;
-
- gc1 = ( seq[i][k] == '-' );
- gc2 = ( seq[j][k] == '-' );
-
- cob =
- !gb1 * gc1
- * !gb2 * !gc2
-
- + gb1 * !gc1
- * !gb2 * !gc2
-
- + !gb1 * !gc1
- * !gb2 * gc2
-
- + !gb1 * !gc1
- * gb2 * !gc2
-
- + !gb1 * gc1
- * gb2 * !gc2
-
- + gb1 * !gc1
- * !gb2 * gc2
-
-#if 0
- + gb1 * !gc1
- * gb2 * gc2
-
- + gb1 * gc1
- * gb2 * !gc2
-
- + !gb1 * gc1
- * gb2 * gc2
-
- + gb1 * gc1
- * !gb2 * gc2
-#endif
- ;
- score += 0.5 * (float)cob * penalty;
- score += (float)amino_dis[(int)seq[i][k]][(int)seq[j][k]];
- nglen += ( !gc1 * !gc2 ); /* tsukawanai */
+ for( j=ac[i].next; j!=-1; j=ac[j].next )
+ {
+ tmpdouble = eff[i][j];
+ if( tmpdouble < minscore )
+ {
+ minscore = tmpdouble;
+ im = i; jm = j;
+ }
+ }
}
-#if 0
- fprintf( stderr, "i = %d, j=%d\n", i+1, j+1 );
- fprintf( stderr, "score = %f\n", score );
-#endif
- }
- return( (double)score );
-}
-
-
-
-double DSPscore( int s, char **seq ) /* method 3 deha nai */
-{
- int i, j, k;
- double c;
- int len = strlen( seq[0] );
- double score;
- double tmpscore;
- char *mseq1, *mseq2;
-#if DEBUG
- FILE *fp;
-#endif
-
- score = 0.0;
- c = 0.0;
-
- for( i=0; i<s-1; i++ )
- {
- for( j=i+1; j<s; j++ )
- {
- mseq1 = seq[i];
- mseq2 = seq[j];
- tmpscore = 0.0;
- for( k=0; k<len; k++ )
- {
- if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
-
- if( mseq1[k] == '-' )
- {
- tmpscore += penalty;
- while( mseq1[++k] == '-' )
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
- k--;
- if( k > len-2 ) break;
- continue;
- }
- if( mseq2[k] == '-' )
- {
- tmpscore += penalty;
- while( mseq2[++k] == '-' )
- tmpscore += amino_dis[(int)mseq1[k]][(int)mseq2[k]];
- k--;
- if( k > len-2 ) break;
- continue;
- }
- }
- score += (double)tmpscore;
- }
- }
-
- return( score );
-}
+// reporterr( "im=%d, jm=%d\n", im, jm );
-#define SEGMENTSIZE 150
-
-int searchAnchors( int nseq, char **seq, Segment *seg )
-{
- int i, j, k, kcyc;
- int status;
- double score;
- int value = 0;
- int len;
- int length;
- static double *stra = NULL;
- static int alloclen = 0;
- double cumscore;
- static double threshold;
-
- len = strlen( seq[0] );
- if( alloclen < len )
- {
- if( alloclen )
+ intpt = topol[k][0];
+ prevnode = hist[im];
+ if( prevnode == -1 )
{
- FreeDoubleVec( stra );
+ *intpt++ = im;
+ *intpt = -1;
}
else
{
- threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize;
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+ pt22 = pt1;
+ }
+ else
+ {
+ pt11 = pt1;
+ pt22 = pt2;
+ }
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
}
- stra = AllocateDoubleVec( len );
- alloclen = len;
- }
- for( i=0; i<len; i++ )
- {
-#if 0
- /* make prf */
- for( j=0; j<26; j++ )
+ intpt = topol[k][1];
+ prevnode = hist[jm];
+ if( prevnode == -1 )
{
- prf[j] = 0.0;
+ *intpt++ = jm;
+ *intpt = -1;
}
- for( j=0; j<nseq; j++ ) prf[amino_n[seq[j][i]]] += 1.0;
-
- /* make hat */
- pre = 26;
- for( j=25; j>=0; j-- )
+ else
{
- if( prf[j] )
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
+ {
+ pt11 = pt2;
+ pt22 = pt1;
+ }
+ else
{
- hat[pre] = j;
- pre = j;
+ pt11 = pt1;
+ pt22 = pt2;
}
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
}
- hat[pre] = -1;
- /* make site score */
- stra[i] = 0.0;
- for( k=hat[26]; k!=-1; k=hat[k] )
- for( j=hat[26]; j!=-1; j=hat[j] )
- stra[i] += n_dis[k][j] * prf[k] * prf[j];
-#else
- stra[i] = 0.0;
- kcyc = nseq-1;
- for( k=0; k<kcyc; k++ ) for( j=k+1; j<nseq; j++ )
- stra[i] += n_dis[(int)amino_n[(int)seq[k][i]]][(int)amino_n[(int)seq[j][i]]];
- stra[i] /= (double)nseq * ( nseq-1 ) / 2;
-#endif
- }
+ minscore *= 0.5;
- (seg+0)->skipForeward = 0;
- (seg+1)->skipBackward = 0;
- status = 0;
- cumscore = 0.0;
- score = 0.0;
- length = 0; /* modified at 01/09/11 */
- for( j=0; j<divWinSize; j++ ) score += stra[j];
- for( i=1; i<len-divWinSize; i++ )
- {
- score = score - stra[i-1] + stra[i+divWinSize-1];
-#if DEBUG
- fprintf( stderr, "%d %f ? %f", i, score, threshold );
- if( score > threshold ) fprintf( stderr, "YES\n" );
- else fprintf( stderr, "NO\n" );
-#endif
+ len[k][0] = minscore - tmptmplen[im];
+ len[k][1] = minscore - tmptmplen[jm];
- if( score > threshold )
- {
- if( !status )
- {
- status = 1;
- seg->start = i;
- length = 0;
- cumscore = 0.0;
- }
- length++;
- cumscore += score;
- }
- if( score <= threshold || length > SEGMENTSIZE )
- {
- if( status )
- {
- seg->end = i;
- seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
- seg->score = cumscore;
-#if DEBUG
- fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
-#endif
- if( length > SEGMENTSIZE )
+ tmptmplen[im] = minscore;
+
+ hist[im] = k;
+
+ for( i=0; i!=-1; i=ac[i].next )
+ {
+ if( i != im && i != jm )
+ {
+ if( i < im )
{
- (seg+0)->skipForeward = 1;
- (seg+1)->skipBackward = 1;
+ miniim = i;
+ maxiim = im;
+ minijm = i;
+ maxijm = jm;
+ }
+ else if( i < jm )
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = i;
+ maxijm = jm;
}
else
{
- (seg+0)->skipForeward = 0;
- (seg+1)->skipBackward = 0;
+ miniim = im;
+ maxiim = i;
+ minijm = jm;
+ maxijm = i;
}
- length = 0;
- cumscore = 0.0;
- status = 0;
- value++;
- seg++;
- if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!");
- }
- }
- }
- if( status )
- {
- seg->end = i;
- seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
- seg->score = cumscore;
-#if DEBUG
-fprintf( stderr, "%d-%d length = %d\n", seg->start, seg->end, length );
+ eff0 = eff[miniim][maxiim];
+ eff1 = eff[minijm][maxijm];
+ eff[miniim][maxiim] =
+ (clusterfuncpt[0])( eff0, eff1 );
+ }
+ }
+ acjmprev = ac[jm].prev;
+ acjmnext = ac[jm].next;
+ ac[acjmprev].next = acjmnext;
+ if( acjmnext != -1 )
+ ac[acjmnext].prev = acjmprev;
+
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], len[k][0], tree[jm], len[k][1] );
+ strcpy( tree[im], treetmp );
+#if 0
+ fprintf( stdout, "STEP-%03d:\n", k+1 );
+ fprintf( stdout, "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
+ fprintf( stdout, "\n" );
+ fprintf( stdout, "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
+ fprintf( stdout, "\n" );
+#endif
+ }
+ fpout = fopen( "infile.tree", "w" );
+ fprintf( fpout, "%s\n", treetmp );
+// fprintf( fpout, "by veryfastsupg_double_outtree\n" );
+ fclose( fpout );
+#if 1
+ reporterr( "\n" );
+ free( (void *)tmptmplen ); tmptmplen = NULL;
+ free( hist ); hist = NULL;
+ free( (char *)ac ); ac = NULL;
+ FreeCharMtx( tree );
+ free( treetmp );
+ free( nametmp );
#endif
- value++;
- }
- return( value );
}
-void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
+void veryfastsupg( int nseq, double **oeff, int ***topol, double **len )
{
- int i, j;
- LocalHom *ptr;
- static int *nogaplen = NULL;
-
- if( nogaplen == NULL )
- {
- nogaplen = AllocateIntVec( nseq );
- }
-
- for( i=0; i<nseq; i++ )
+ int i, j, k, miniim, maxiim, minijm, maxijm;
+ int *intpt, *intpt2;
+ int tmpint;
+ int eff1, eff0;
+ static double *tmptmplen = NULL;
+ static int **eff = NULL;
+ static int *hist = NULL;
+ static Achain *ac = NULL;
+ int minscore;
+ double minscoref;
+ int im = -1, jm = -1;
+ int prevnode, acjmnext, acjmprev;
+ int *pt1, *pt2, *pt11, *pt22;
+ if( !eff )
{
- nogaplen[i] = seqlen( seq[i] );
-// fprintf( stderr, "nogaplen[%d] = %d\n", i, nogaplen[i] );
+ eff = AllocateIntMtx( njob, njob );
+ hist = AllocateIntVec( njob );
+ tmptmplen = (double *)malloc( njob * sizeof( double ) );
+ ac = (Achain *)malloc( njob * sizeof( Achain ) );
}
-
- for( i=0; i<nseq; i++ )
+
+ for( i=0; i<nseq; i++ )
{
- for( j=0; j<nseq; j++ )
+ for( j=0; j<nseq; j++ )
{
- for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
- {
-// fprintf( stderr, "i,j=%d,%d,ptr=%p\n", i, j, ptr );
-#if 1
- ptr->importance = ptr->opt / ptr->overlapaa;
- ptr->fimportance = (float)ptr->importance;
-#else
- ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
-#endif
- }
+ eff[i][j] = (int)( oeff[i][j] * INTMTXSCALE + 0.5 );
}
}
-}
-
-void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
-{
- int i, j, pos, len;
- static double *importance;
- double tmpdouble;
- static int *nogaplen = NULL;
- LocalHom *tmpptr;
- if( importance == NULL )
+ for( i=0; i<nseq; i++ )
{
- importance = AllocateDoubleVec( nlenmax );
- nogaplen = AllocateIntVec( nseq );
+ ac[i].next = i+1;
+ ac[i].prev = i-1;
+// ac[i].curr = i;
}
+ ac[nseq-1].next = -1;
+ for( i=0; i<nseq; i++ ) tmptmplen[i] = 0.0;
+ for( i=0; i<nseq; i++ ) hist[i] = -1;
- for( i=0; i<nseq; i++ )
- {
- nogaplen[i] = seqlen( seq[i] );
-// fprintf( stderr, "nogaplen[] = %d\n", nogaplen[i] );
- }
+ reporterr( "\n" );
+ for( k=0; k<nseq-1; k++ )
+ {
+ if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
-#if 0
- for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
- {
- tmpptr = localhom[i]+j;
- fprintf( stderr, "%d-%d\n", i, j );
- do
+ minscore = INTMTXSCALE*4;
+ for( i=0; ac[i].next!=-1; i=ac[i].next )
{
- fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
- } while( tmpptr=tmpptr->next );
- }
-#endif
+ for( j=ac[i].next; j!=-1; j=ac[j].next )
+ {
+ tmpint = eff[i][j];
+ if( tmpint < minscore )
+ {
+ minscore = tmpint;
+ im = i; jm = j;
+ }
+ }
+ }
+ minscoref = (double)minscore * 0.5 / ( INTMTXSCALE );
+// reporterr( "im=%d, jm=%d\n", im, jm );
- for( i=0; i<nseq; i++ )
- {
-// fprintf( stderr, "i = %d\n", i );
- for( pos=0; pos<nlenmax; pos++ )
- importance[pos] = 0.0;
- for( j=0; j<nseq; j++ )
- {
- if( i == j ) continue;
- tmpptr = localhom[i]+j;
- for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
- {
- if( tmpptr->opt == -1 ) continue;
- for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
#if 1
- importance[pos] += eff[j];
-#else
- importance[pos] += eff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
- importance[pos] += eff[j] * tmpptr->opt / tmpptr->overlapaa;
-#endif
- }
+ intpt = topol[k][0];
+ prevnode = hist[im];
+ if( prevnode == -1 )
+ {
+ *intpt++ = im;
+ *intpt = -1;
}
-#if 0
- fprintf( stderr, "position specific importance of seq %d:\n", i );
- for( pos=0; pos<nlenmax; pos++ )
- fprintf( stderr, "%d: %f\n", pos, importance[pos] );
- fprintf( stderr, "\n" );
-#endif
- for( j=0; j<nseq; j++ )
+ else
{
-// fprintf( stderr, "i=%d, j=%d\n", i, j );
- if( i == j ) continue;
- if( localhom[i][j].opt == -1.0 ) continue;
-#if 1
- for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
{
- if( tmpptr->opt == -1.0 ) continue;
- tmpdouble = 0.0;
- len = 0;
- for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
- {
- tmpdouble += importance[pos];
- len++;
- }
- tmpdouble /= (double)len;
-
- tmpptr->importance = tmpdouble * tmpptr->opt;
- tmpptr->fimportance = (float)tmpptr->importance;
+ pt11 = pt2;
+ pt22 = pt1;
}
-#else
- tmpdouble = 0.0;
- len = 0;
- for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
+ else
{
- if( tmpptr->opt == -1.0 ) continue;
- for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
- {
- tmpdouble += importance[pos];
- len++;
- }
+ pt11 = pt1;
+ pt22 = pt2;
}
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
+ }
- tmpdouble /= (double)len;
-
- for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
- {
- if( tmpptr->opt == -1.0 ) continue;
- tmpptr->importance = tmpdouble * tmpptr->opt;
-// tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //\e$B$J$+$C$?$3$H$K$9$k\e(B
- }
-#endif
-
-// fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
- }
- }
-
-#if 0
- fprintf( stderr, "before averaging:\n" );
-
- for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
- {
- fprintf( stderr, "%d-%d\n", i, j );
- for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
+ intpt = topol[k][1];
+ prevnode = hist[jm];
+ if( prevnode == -1 )
{
- fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
+ *intpt++ = jm;
+ *intpt = -1;
}
- }
-#endif
-
-#if 1
-// fprintf( stderr, "average?\n" );
- for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
- {
- double imp;
- LocalHom *tmpptr1, *tmpptr2;
-
-// fprintf( stderr, "i=%d, j=%d\n", i, j );
-
- tmpptr1 = localhom[i]+j; tmpptr2 = localhom[j]+i;
- for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
+ else
{
- if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 )
+ pt1 = topol[prevnode][0];
+ pt2 = topol[prevnode][1];
+ if( *pt1 > *pt2 )
{
-// fprintf( stderr, "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
- continue;
+ pt11 = pt2;
+ pt22 = pt1;
}
-// fprintf( stderr, "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
- imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
- tmpptr1->importance = tmpptr2->importance = imp;
- tmpptr1->fimportance = tmpptr2->fimportance = (float)imp;
-
-// fprintf( stderr, "## importance = %f\n", tmpptr1->importance );
-
- }
-
-#if 1
- if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
- {
- fprintf( stderr, "ERROR: i=%d, j=%d\n", i, j );
- exit( 1 );
+ else
+ {
+ pt11 = pt1;
+ pt22 = pt2;
+ }
+ for( intpt2=pt11; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ for( intpt2=pt22; *intpt2!=-1; )
+ *intpt++ = *intpt2++;
+ *intpt = -1;
}
-#endif
- }
-#endif
-#if 0
- fprintf( stderr, "after averaging:\n" );
+#else
+ intpt = topol[k][0];
+ for( i=0; i<nseq; i++ )
+ if( pair[im][i] > -2 )
+ *intpt++ = i;
+ *intpt = -1;
- for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
- {
- fprintf( stderr, "%d-%d\n", i, j );
- for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
- {
- fprintf( stderr, "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
- }
- }
+ intpt = topol[k][1];
+ for( i=0; i<nseq; i++ )
+ if( pair[jm][i] > -2 )
+ *intpt++ = i;
+ *intpt = -1;
#endif
-}
-
-
-#if 0
-void weightimportance( int nseq, double **eff, LocalHom **localhom )
-{
- int i, j, pos, len;
- static double *importance;
- double tmpdouble;
- LocalHom *tmpptr, *tmpptr1, *tmpptr2;
- if( importance == NULL )
- importance = AllocateDoubleVec( nlenmax );
+ len[k][0] = minscoref - tmptmplen[im];
+ len[k][1] = minscoref - tmptmplen[jm];
- fprintf( stderr, "effmtx = :\n" );
- for( i=0; i<nseq; i++ )
- {
- for( j=0; j<nseq; j++ )
- {
- fprintf( stderr, "%6.3f ", eff[i][j] );
- }
- fprintf( stderr, "\n" );
- }
- for( i=0; i<nseq; i++ )
- {
- for( pos=0; pos<nlenmax; pos++ )
- importance[pos] = 0.0;
- for( j=0; j<nseq; j++ )
- {
+ tmptmplen[im] = minscoref;
- if( i == j ) continue;
- tmpptr = localhom[i]+j;
- while( 1 )
- {
- fprintf( stderr, "i=%d, j=%d\n", i, j );
- for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
-// importance[pos] += eff[i][j] * tmpptr->importance;
- importance[pos] += eff[i][j] / (double)nseq * tmpptr->importance / 1.0;
- fprintf( stderr, "eff[][] = %f, localhom[i][j].importance = %f \n", eff[i][j], tmpptr->importance );
- tmpptr = tmpptr->next;
- if( tmpptr == NULL ) break;
- }
+ hist[im] = k;
- }
-#if 0
- fprintf( stderr, "position specific importance of seq %d:\n", i );
- for( pos=0; pos<nlenmax; pos++ )
- fprintf( stderr, "%d: %f\n", pos, importance[pos] );
- fprintf( stderr, "\n" );
-#endif
- for( j=0; j<nseq; j++ )
- {
- fprintf( stderr, "i=%d, j=%d\n", i, j );
- if( i == j ) continue;
- tmpptr = localhom[i]+j;
- do
- {
- tmpdouble = 0.0;
- len = 0;
- for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
+ for( i=0; i!=-1; i=ac[i].next )
+ {
+ if( i != im && i != jm )
+ {
+ if( i < im )
{
- tmpdouble += importance[pos];
- len++;
+ miniim = i;
+ maxiim = im;
+ minijm = i;
+ maxijm = jm;
}
- tmpdouble /= (double)len;
- tmpptr->importance = tmpdouble;
- fprintf( stderr, "importance of match between %d - %d = %f\n", i, j, tmpdouble );
- tmpptr = tmpptr->next;
- } while( tmpptr );
- }
- }
+ else if( i < jm )
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = i;
+ maxijm = jm;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = jm;
+ maxijm = i;
+ }
+ eff0 = eff[miniim][maxiim];
+ eff1 = eff[minijm][maxijm];
+ eff[miniim][maxiim] =
+ MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) + // int??
+ ( eff0 + eff1 ) * 0.5 * sueff_global; // int??
+ }
+ }
+ acjmprev = ac[jm].prev;
+ acjmnext = ac[jm].next;
+ ac[acjmprev].next = acjmnext;
+ if( acjmnext != -1 )
+ ac[acjmnext].prev = acjmprev;
+#if 0
+ fprintf( stdout, "STEP-%03d:\n", k+1 );
+ fprintf( stdout, "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][0][i] );
+ fprintf( stdout, "\n" );
+ fprintf( stdout, "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) fprintf( stdout, " %03d", topol[k][1][i] );
+ fprintf( stdout, "\n" );
+#endif
+ }
#if 1
- for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
- {
- fprintf( stderr, "i = %d, j=%d\n", i, j );
- tmpptr1 = localhom[i]+j;
- tmpptr2 = localhom[j]+i;
- while( tmpptr1 && tmpptr2 )
- {
- tmpptr1->importance += tmpptr2->importance;
- tmpptr1->importance *= 0.5;
- tmpptr2->importance *= tmpptr1->importance;
- fprintf( stderr, "%d-%d: s1=%d, e1=%d, s2=%d, e2=%d, importance=%f\n", i, j, tmpptr1->start1, tmpptr1->end1, tmpptr1->start2, tmpptr1->end2, tmpptr1->importance );
- tmpptr1 = tmpptr1->next;
- tmpptr2 = tmpptr2->next;
- fprintf( stderr, "tmpptr1 = %p, tmpptr2 = %p\n", tmpptr1, tmpptr2 );
- }
- }
+ FreeIntMtx( eff ); eff = NULL;
+ free( (void *)tmptmplen ); tmptmplen = NULL;
+ free( hist ); hist = NULL;
+ free( (char *)ac ); ac = NULL;
#endif
}
-void weightimportance2( int nseq, double *eff, LocalHom **localhom )
+void fastsupg( int nseq, double **oeff, int ***topol, double **len )
{
- int i, j, pos, len;
- static double *wimportance;
+ int i, j, k, miniim, maxiim, minijm, maxijm;
+#if 0
+ double eff[nseq][nseq];
+ char pair[njob][njob];
+#else
+ static double *tmplen;
+ int *intpt;
double tmpdouble;
- if( wimportance == NULL )
- wimportance = AllocateDoubleVec( nlenmax );
-
-
- fprintf( stderr, "effmtx = :\n" );
- for( i=0; i<nseq; i++ )
+ double eff1, eff0;
+ static double **eff = NULL;
+ static char **pair = NULL;
+ static Achain *ac;
+ double minscore;
+ int im = -1, jm = -1;
+ if( !eff )
{
- for( j=0; j<nseq; j++ )
- {
- fprintf( stderr, "%6.3f ", eff[i] * eff[j] );
- }
- fprintf( stderr, "\n" );
+ eff = AllocateFloatMtx( njob, njob );
+ pair = AllocateCharMtx( njob, njob );
+ tmplen = AllocateFloatVec( njob );
+ ac = (Achain *)calloc( njob, sizeof( Achain ) );
}
- for( i=0; i<nseq; i++ )
- {
- fprintf( stderr, "i = %d\n", i );
- for( pos=0; pos<nlenmax; pos++ )
- wimportance[pos] = 0.0;
- for( j=0; j<nseq; j++ )
- {
- if( i == j ) continue;
- for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
-// wimportance[pos] += eff[i][j];
- wimportance[pos] += eff[i] * eff[j] / (double)nseq * localhom[i][j].importance / 1.0;
- }
-#if 0
- fprintf( stderr, "position specific wimportance of seq %d:\n", i );
- for( pos=0; pos<nlenmax; pos++ )
- fprintf( stderr, "%d: %f\n", pos, wimportance[pos] );
- fprintf( stderr, "\n" );
#endif
- for( j=0; j<nseq; j++ )
+
+ for( i=0; i<nseq; i++ )
+ {
+ for( j=0; j<nseq; j++ )
{
- if( i == j ) continue;
- tmpdouble = 0.0;
- len = 0;
- for( pos=localhom[i][j].start1; pos<=localhom[i][j].end1; pos++ )
- {
- tmpdouble += wimportance[pos];
- len++;
- }
- tmpdouble /= (double)len;
- localhom[i][j].wimportance = tmpdouble;
- fprintf( stderr, "wimportance of match between %d - %d = %f\n", i, j, tmpdouble );
+ eff[i][j] = (double)oeff[i][j];
}
}
-#if 1
- for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
- {
- localhom[i][j].wimportance += localhom[j][i].wimportance;
- localhom[i][j].wimportance = 0.5 * ( localhom[i][j].wimportance );
- }
- for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+
+ for( i=0; i<nseq; i++ )
{
- localhom[j][i].wimportance = localhom[i][j].wimportance;
+ ac[i].next = i+1;
+ ac[i].prev = i-1;
+// ac[i].curr = i;
}
-#endif
-}
-
-void weightimportance4( int clus1, int clus2, double *eff1, double *eff2, LocalHom ***localhom )
-{
- int i, j, pos, len;
- static double *wimportance;
- LocalHom *tmpptr, *tmpptr1, *tmpptr2;
- if( wimportance == NULL )
- wimportance = AllocateDoubleVec( nlenmax );
+ ac[nseq-1].next = -1;
+ for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
+ for( i=0; i<nseq; i++ ) pair[i][i] = 1;
-#if 0
- fprintf( stderr, "effarr1 = :\n" );
- for( i=0; i<clus1; i++ )
- fprintf( stderr, "%6.3f\n", eff1[i] );
- fprintf( stderr, "effarr2 = :\n" );
- for( i=0; i<clus2; i++ )
- fprintf( stderr, "%6.3f\n", eff2[i] );
-#endif
+ reporterr( "\n" );
+ for( k=0; k<nseq-1; k++ )
+ {
+ if( k % 10 == 0 ) reporterr( "%d / %d\r", k, nseq );
- for( i=0; i<clus1; i++ )
- {
- for( j=0; j<clus2; j++ )
+ minscore = 9999.0;
+ for( i=0; ac[i].next!=-1; i=ac[i].next )
+// for( i=0; i<nseq-1; i++ )
{
-// fprintf( stderr, "i=%d, j=%d\n", i, j );
- tmpptr = localhom[i][j];
- do
- {
- tmpptr->wimportance = tmpptr->importance * eff1[i] * eff2[j];
- tmpptr = tmpptr->next;
- } while( tmpptr );
- }
- }
-}
+ for( j=ac[i].next; j!=-1; j=ac[j].next )
+// for( j=i+1; j<nseq; j++ )
+ {
+ tmpdouble = eff[i][j];
+ if( tmpdouble < minscore )
+ {
+ minscore = tmpdouble;
+ im = i; jm = j;
+ }
+ }
+ }
-static void addlocalhom_e( LocalHom *localhom, int start1, int start2, int end1, int end2, double opt )
-{
- LocalHom *tmpptr;
- tmpptr = localhom;
+// reporterr( "im=%d, jm=%d\n", im, jm );
+
+ intpt = topol[k][0];
+ for( i=0; i<nseq; i++ )
+ if( pair[im][i] > 0 )
+ *intpt++ = i;
+ *intpt = -1;
- fprintf( stderr, "adding localhom\n" );
- while( tmpptr->next )
- tmpptr = tmpptr->next;
- fprintf( stderr, "allocating localhom\n" );
- tmpptr->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
- fprintf( stderr, "done\n" );
- tmpptr = tmpptr->next;
+ intpt = topol[k][1];
+ for( i=0; i<nseq; i++ )
+ if( pair[jm][i] > 0 )
+ *intpt++ = i;
+ *intpt = -1;
- tmpptr->start1 = start1;
- tmpptr->start2 = start2;
- tmpptr->end1 = end1;
- tmpptr->end2 = end2;
- tmpptr->opt = opt;
+ minscore /= 2.0;
- fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
-}
+ len[k][0] = (double)minscore - tmplen[im];
+ len[k][1] = (double)minscore - tmplen[jm];
-#if 0
-#endif
+ tmplen[im] = (double)minscore;
+ for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
+ for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
+// for( i=0; i<nseq; i++ )
+ for( i=0; i!=-1; i=ac[i].next )
+ {
+ if( i != im && i != jm )
+ {
+ if( i < im )
+ {
+ miniim = i;
+ maxiim = im;
+ minijm = i;
+ maxijm = jm;
+ }
+ else if( i < jm )
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = i;
+ maxijm = jm;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = jm;
+ maxijm = i;
+ }
+ eff0 = eff[miniim][maxiim];
+ eff1 = eff[minijm][maxijm];
+ eff[miniim][maxiim] =
+ MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) +
+ ( eff0 + eff1 ) * 0.5 * sueff_global;
+// eff[minijm][maxijm] = 9999.0;
+ }
+ }
+ ac[ac[jm].prev].next = ac[jm].next;
+ ac[ac[jm].next].prev = ac[jm].prev;
+// eff[im][jm] = 9999.0;
+#if 0
+ reporterr( "STEP-%03d:\n", k+1 );
+ reporterr( "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) reporterr( " %03d", topol[k][0][i] );
+ reporterr( "\n" );
+ reporterr( "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) reporterr( " %03d", topol[k][1][i] );
+ reporterr( "\n" );
+#endif
+ }
+ reporterr( "\n" );
-void extendlocalhom( int nseq, LocalHom **localhom )
+// FreeFloatMtx( eff );
+// FreeCharMtx( pair );
+// FreeFloatVec( tmplen );
+// free( ac );
+}
+void supg( int nseq, double **oeff, int ***topol, double **len )
{
- int i, j, k, pos0, pos1, pos2, st;
- int start1, start2, end1, end2;
- static int *tmpint1 = NULL;
- static int *tmpint2 = NULL;
- static int *tmpdouble1 = NULL;
- static int *tmpdouble2 = NULL;
- double opt;
- LocalHom *tmpptr;
- if( tmpint1 == NULL )
+ int i, j, k, miniim, maxiim, minijm, maxijm;
+#if 0
+ double eff[nseq][nseq];
+ char pair[njob][njob];
+#else
+ static double *tmplen;
+ int *intpt;
+ double **doubleptpt;
+ double *doublept;
+ double tmpdouble;
+ double eff1, eff0;
+ static double **eff = NULL;
+ static char **pair = NULL;
+ if( !eff )
{
- tmpint1 = AllocateIntVec( nlenmax );
- tmpint2 = AllocateIntVec( nlenmax );
- tmpdouble1 = AllocateIntVec( nlenmax );
- tmpdouble2 = AllocateIntVec( nlenmax );
+ eff = AllocateFloatMtx( njob, njob );
+ pair = AllocateCharMtx( njob, njob );
+ tmplen = AllocateFloatVec( njob );
}
+#endif
-
- for( k=0; k<nseq; k++ )
+
+ for( i=0; i<nseq; i++ )
{
- for( i=0; i<nseq-1; i++ )
+ for( j=0; j<nseq; j++ )
{
- if( i == k ) continue;
- for( pos0=0; pos0<nlenmax; pos0++ )
- tmpint1[pos0] = -1;
+ eff[i][j] = (double)oeff[i][j];
+ }
+ }
+ for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
+ for( i=0; i<nseq; i++ ) pair[i][i] = 1;
- tmpptr=localhom[k]+i;
- do
- {
- pos0 = tmpptr->start1;
- pos1 = tmpptr->start2;
- while( pos0<=tmpptr->end1 )
- {
- tmpint1[pos0] = pos1++;
- tmpdouble1[pos0] = tmpptr->opt;
- pos0++;
- }
- } while( tmpptr = tmpptr->next );
+ for( k=0; k<nseq-1; k++ )
+ {
+ double minscore = 9999.0;
+ int im = -1, jm = -1;
+ doubleptpt = eff;
+ for( i=0; i<nseq-1; i++ )
+ {
+ doublept = *doubleptpt++ + i + 1;
for( j=i+1; j<nseq; j++ )
- {
- if( j == k ) continue;
- for( pos1=0; pos1<nlenmax; pos1++ ) tmpint2[pos1] = -1;
- tmpptr=localhom[k]+j;
- do
+ {
+ tmpdouble = *doublept++;
+ if( tmpdouble < minscore )
{
- pos0 = tmpptr->start1;
- pos2 = tmpptr->start2;
- while( pos0<=tmpptr->end1 )
- {
- tmpint2[pos0] = pos2++;
- tmpdouble2[pos0++] = tmpptr->opt;
- }
- } while( tmpptr = tmpptr->next );
-
-#if 0
+ minscore = tmpdouble;
+ im = i; jm = j;
+ }
+ }
+ }
+ intpt = topol[k][0];
+ for( i=0; i<nseq; i++ )
+ if( pair[im][i] > 0 )
+ *intpt++ = i;
+ *intpt = -1;
- fprintf( stderr, "i,j=%d,%d\n", i, j );
+ intpt = topol[k][1];
+ for( i=0; i<nseq; i++ )
+ if( pair[jm][i] > 0 )
+ *intpt++ = i;
+ *intpt = -1;
- for( pos0=0; pos0<nlenmax; pos0++ )
- fprintf( stderr, "%d ", tmpint1[pos0] );
- fprintf( stderr, "\n" );
+ len[k][0] = (double)minscore / 2.0 - tmplen[im];
+ len[k][1] = (double)minscore / 2.0 - tmplen[jm];
- for( pos0=0; pos0<nlenmax; pos0++ )
- fprintf( stderr, "%d ", tmpint2[pos0] );
- fprintf( stderr, "\n" );
-#endif
+ tmplen[im] = (double)minscore / 2.0;
+ for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
+ for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
- st = 0;
- for( pos0=0; pos0<nlenmax; pos0++ )
+ for( i=0; i<nseq; i++ )
+ {
+ if( i != im && i != jm )
+ {
+#if 1
+ if( i < im )
{
-// fprintf( stderr, "pos0 = %d/%d, st = %d, tmpint1[pos0] = %d, tmpint2[pos0] = %d\n", pos0, nlenmax, st, tmpint1[pos0], tmpint2[pos0] );
- if( tmpint1[pos0] >= 0 && tmpint2[pos0] >= 0 )
- {
- if( st == 0 )
- {
- st = 1;
- start1 = tmpint1[pos0];
- start2 = tmpint2[pos0];
- opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
- }
- else if( tmpint1[pos0-1] != tmpint1[pos0]-1 || tmpint2[pos0-1] != tmpint2[pos0]-1 )
- {
- addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
- addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
- start1 = tmpint1[pos0];
- start2 = tmpint2[pos0];
- opt = MIN( tmpdouble1[pos0], tmpdouble2[pos0] );
- }
- }
- if( tmpint1[pos0] == -1 || tmpint2[pos0] == -1 )
- {
- if( st == 1 )
- {
- st = 0;
- addlocalhom_e( localhom[i]+j, start1, start2, tmpint1[pos0-1], tmpint2[pos0-1], opt );
- addlocalhom_e( localhom[j]+i, start2, start1, tmpint2[pos0-1], tmpint1[pos0-1], opt );
- }
- }
+ miniim = i;
+ maxiim = im;
+ minijm = i;
+ maxijm = jm;
}
- }
- }
- }
-}
+ else if( i < jm )
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = i;
+ maxijm = jm;
+ }
+ else
+ {
+ miniim = im;
+ maxiim = i;
+ minijm = jm;
+ maxijm = i;
+ }
+#else
+ miniim = MIN( i, im );
+ maxiim = MAX( i, im );
+ minijm = MIN( i, jm );
+ maxijm = MAX( i, jm );
#endif
-
-static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm )
-{
-// dokka machigatteru
- if( pt != lh ) // susumeru
- {
- pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
- pt = pt->next;
- pt->next = NULL;
- lh->last = pt;
- }
- else // sonomamatsukau
- {
- lh->last = pt;
- }
- lh->nokori++;
-// fprintf( stderr, "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj );
-
- pt->start1 = sti;
- pt->start2 = stj;
- pt->end1 = eni;
- pt->end2 = enj;
- pt->opt = opt;
- pt->extended = interm;
- pt->overlapaa = overlp;
-#if 0
- fprintf( stderr, "i: %d-%d\n", sti, eni );
- fprintf( stderr, "j: %d-%d\n", stj, enj );
- fprintf( stderr, "opt=%f\n", opt );
- fprintf( stderr, "overlp=%d\n", overlp );
+#if 1
+ eff0 = eff[miniim][maxiim];
+ eff1 = eff[minijm][maxijm];
+ eff[miniim][maxiim] =
+ MIN( eff0, eff1 ) * ( 1.0 - sueff_global ) +
+ ( eff0 + eff1 ) * 0.5 * sueff_global;
+#else
+ MIN( eff[miniim][maxiim], eff[minijm][maxijm] ) * ( 1.0 - sueff_global ) +
+ ( eff[miniim][maxiim] + eff[minijm][maxijm] ) * 0.5 * sueff_global;
+#endif
+ eff[minijm][maxijm] = 9999.0;
+ eff[im][jm] = 9999.0;
+ }
+ }
+#if DEBUG
+ printf( "STEP-%03d:\n", k+1 );
+ printf( "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
+ printf( "\n" );
+ printf( "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
+ printf( "\n" );
#endif
+ }
}
-void extendlocalhom2( int nseq, LocalHom **localhom, double **dist )
+void spg( int nseq, double **oeff, int ***topol, double **len )
{
- int overlp, plim;
- int i, j, k;
- int pi, pj, pk, len;
- int status, sti, stj;
- int *ipt;
- int co;
- static int *ini = NULL;
- static int *inj = NULL;
- LocalHom *pt;
-
- sti = 0; // by D.Mathog, a guess
- stj = 0; // by D.Mathog, a guess
-
- if( ini == NULL )
+ int i, j, k;
+ double tmplen[M];
+#if 0
+ double eff[nseq][nseq];
+ char pair[njob][njob];
+#else
+ double **eff = NULL;
+ char **pair = NULL;
+ if( !eff )
{
- ini = AllocateIntVec( nlenmax+1 );
- inj = AllocateIntVec( nlenmax+1 );
+ eff = AllocateDoubleMtx( njob, njob );
+ pair = AllocateCharMtx( njob, njob );
}
-
-
- for( i=0; i<nseq-1; i++ )
- {
- for( j=i+1; j<nseq; j++ )
- {
-#if 0
- for( k=0; k<nseq; k++ ) sai[k] = 0;
- numint = ncons;
- while( 1 )
- {
- k = (int)( rnd() * nseq );
- if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
- if( numint-- == 0 ) break;
- if( sai[k] ) continue;
- sai[k] = 1;
-#else
- for( k=0; k<nseq; k++ )
- {
#endif
-// fprintf( stderr, "i=%d, j=%d, k=%d, dists = %f,%f,%f thrinter=%f\n", i, j, k, dist[i][j], dist[MIN(i,k)][MAX(i,k)], dist[MIN(j,k)][MAX(j,k)], thrinter );
- if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
- if( dist[MIN(i,k)][MAX(i,k)] > dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue;
- ipt = ini; co = nlenmax+1;
- while( co-- ) *ipt++ = -1;
- ipt = inj; co = nlenmax+1;
- while( co-- ) *ipt++ = -1;
- overlp = 0;
+
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) eff[i][j] = oeff[i][j];
+ for( i=0; i<nseq; i++ ) tmplen[i] = 0.0;
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ ) pair[i][j] = 0;
+ for( i=0; i<nseq; i++ ) pair[i][i] = 1;
- {
- for( pt=localhom[i]+k; pt; pt=pt->next )
- {
-// fprintf( stderr, "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended );
- if( pt->opt == -1 )
- {
- fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt );
- }
- if( pt->extended > -1 ) break;
- pi = pt->start1;
- pk = pt->start2;
- len = pt->end1 - pt->start1 + 1;
- ipt = ini + pk;
- while( len-- ) *ipt++ = pi++;
- }
- }
+ for( k=0; k<nseq-1; k++ )
+ {
+ double minscore = 9999.0;
+ int im = -1, jm = -1;
+ int count;
- {
- for( pt=localhom[j]+k; pt; pt=pt->next )
- {
- if( pt->opt == -1 )
- {
- fprintf( stderr, "opt kainaide tbfast.c = %f\n", pt->opt );
- }
- if( pt->extended > -1 ) break;
- pj = pt->start1;
- pk = pt->start2;
- len = pt->end1 - pt->start1 + 1;
- ipt = inj + pk;
- while( len-- ) *ipt++ = pj++;
- }
- }
-#if 0
- fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k );
- overlp = 0;
- for( pk = 0; pk < nlenmax; pk++ )
- {
- if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
- fprintf( stderr, " %d", inj[pk] );
- }
- fprintf( stderr, "\n" );
+ for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+ {
+ if( eff[i][j] < minscore )
+ {
+ minscore = eff[i][j];
+ im = i; jm = j;
+ }
+ }
+ for( i=0, count=0; i<nseq; i++ )
+ if( pair[im][i] > 0 )
+ {
+ topol[k][0][count] = i;
+ count++;
+ }
+ topol[k][0][count] = -1;
+ for( i=0, count=0; i<nseq; i++ )
+ if( pair[jm][i] > 0 )
+ {
+ topol[k][1][count] = i;
+ count++;
+ }
+ topol[k][1][count] = -1;
- fprintf( stderr, "i=%d,j=%d,k=%d\n", i, j, k );
- overlp = 0;
- for( pk = 0; pk < nlenmax; pk++ )
- {
- if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
- fprintf( stderr, " %d", ini[pk] );
- }
- fprintf( stderr, "\n" );
-#endif
- overlp = 0;
- plim = nlenmax+1;
- for( pk = 0; pk < plim; pk++ )
- if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
+ len[k][0] = minscore / 2.0 - tmplen[im];
+ len[k][1] = minscore / 2.0 - tmplen[jm];
+ tmplen[im] = minscore / 2.0;
- status = 0;
- plim = nlenmax+1;
- for( pk=0; pk<plim; pk++ )
- {
-// fprintf( stderr, "%d %d: %d-%d\n", i, j, ini[pk], inj[pk] );
- if( status )
- {
- if( ini[pk] == -1 || inj[pk] == -1 || ini[pk-1] != ini[pk] - 1 || inj[pk-1] != inj[pk] - 1 ) // saigonoshori
- {
- status = 0;
-// fprintf( stderr, "end here!\n" );
+ for( i=0; i<nseq; i++ ) pair[im][i] += ( pair[jm][i] > 0 );
+ for( i=0; i<nseq; i++ ) pair[jm][i] = 0;
- pt = localhom[i][j].last;
-// fprintf( stderr, "in ex (ba), pt = %p, nokori=%d, i,j,k=%d,%d,%d\n", pt, localhom[i][j].nokori, i, j, k );
- addlocalhom2_e( pt, localhom[i]+j, sti, stj, ini[pk-1], inj[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
-// fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
+ for( i=0; i<nseq; i++ )
+ {
+ if( i != im && i != jm )
+ {
+ eff[MIN(i,im)][MAX(i,im)] =
+ MIN( eff[MIN(i,im)][MAX(i,im)], eff[MIN(i,jm)][MAX(i,jm)] );
+ eff[MIN(i,jm)][MAX(i,jm)] = 9999.0;
+ }
+ eff[im][jm] = 9999.0;
+ }
+#if DEBUG
+ printf( "STEP-%03d:\n", k+1 );
+ printf( "len0 = %f\n", len[k][0] );
+ for( i=0; topol[k][0][i]>-1; i++ ) printf( " %03d", topol[k][0][i] );
+ printf( "\n" );
+ printf( "len1 = %f\n", len[k][1] );
+ for( i=0; topol[k][1][i]>-1; i++ ) printf( " %03d", topol[k][1][i] );
+ printf( "\n" );
+#endif
+ }
+}
- pt = localhom[j][i].last;
-// fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next );
-// fprintf( stderr, "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k );
- addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
-// fprintf( stderr, "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
- }
- }
- if( !status ) // else deha arimasenn.
- {
- if( ini[pk] == -1 || inj[pk] == -1 ) continue;
- sti = ini[pk];
- stj = inj[pk];
-// fprintf( stderr, "start here!\n" );
- status = 1;
- }
- }
-// if( status ) fprintf( stderr, "end here\n" );
+double ipower( double x, int n ) /* n > 0 */
+{
+ double r;
-// exit( 1 );
-// fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
- }
-#if 0
- for( pt=localhomtable[i]+j; pt; pt=pt->next )
- {
- if( tmpptr->opt == -1.0 ) continue;
- fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
- }
-#endif
- }
- }
+ r = 1;
+ while( n != 0 )
+ {
+ if( n & 1 ) r *= x;
+ x *= x; n >>= 1;
+ }
+ return( r );
}
-int makelocal( char *s1, char *s2, int thr )
+void countnode( int nseq, int ***topol, double **node ) /* node[j][i] != node[i][j] */
{
- int start, maxstart, maxend;
- char *pt1, *pt2;
- double score;
- double maxscore;
+ int i, j, k, s1, s2;
+ static double rootnode[M];
- pt1 = s1;
- pt2 = s2;
+ if( nseq-2 < 0 )
+ {
+ reporterr( "Too few sequence for countnode: nseq = %d\n", nseq );
+ exit( 1 );
+ }
- maxend = 0; // by D.Mathog, a guess
+ for( i=0; i<nseq; i++ ) rootnode[i] = 0;
+ for( i=0; i<nseq-2; i++ )
+ {
+ for( j=0; topol[i][0][j]>-1; j++ )
+ rootnode[topol[i][0][j]]++;
+ for( j=0; topol[i][1][j]>-1; j++ )
+ rootnode[topol[i][1][j]]++;
+ for( j=0; topol[i][0][j]>-1; j++ )
+ {
+ s1 = topol[i][0][j];
+ for( k=0; topol[i][1][k]>-1; k++ )
+ {
+ s2 = topol[i][1][k];
+ node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
+ }
+ }
+ }
+ for( j=0; topol[nseq-2][0][j]>-1; j++ )
+ {
+ s1 = topol[nseq-2][0][j];
+ for( k=0; topol[nseq-2][1][k]>-1; k++ )
+ {
+ s2 = topol[nseq-2][1][k];
+ node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
+ }
+ }
+}
-// fprintf( stderr, "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 );
- maxscore = 0.0;
- score = 0.0;
- start = 0;
- maxstart = 0;
- while( *pt1 )
+void countnode_int( int nseq, int ***topol, int **node ) /* node[i][j] == node[j][i] */
+{
+ int i, j, k, s1, s2;
+ int rootnode[M];
+
+ for( i=0; i<nseq; i++ ) rootnode[i] = 0;
+ for( i=0; i<nseq-2; i++ )
+ {
+ for( j=0; topol[i][0][j]>-1; j++ )
+ rootnode[topol[i][0][j]]++;
+ for( j=0; topol[i][1][j]>-1; j++ )
+ rootnode[topol[i][1][j]]++;
+ for( j=0; topol[i][0][j]>-1; j++ )
+ {
+ s1 = topol[i][0][j];
+ for( k=0; topol[i][1][k]>-1; k++ )
+ {
+ s2 = topol[i][1][k];
+ node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
+ }
+ }
+ }
+ for( j=0; topol[nseq-2][0][j]>-1; j++ )
+ {
+ s1 = topol[nseq-2][0][j];
+ for( k=0; topol[nseq-2][1][k]>-1; k++ )
+ {
+ s2 = topol[nseq-2][1][k];
+ node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
+ }
+ }
+ for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+ node[j][i] = node[i][j];
+#if DEBUG
+ reporterr( "node[][] in countnode_int" );
+ for( i=0; i<nseq; i++ )
{
-// fprintf( stderr, "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 );
- if( *pt1 == '-' || *pt2 == '-' )
+ for( j=0; j<nseq; j++ )
{
-// fprintf( stderr, "penalty = %d\n", penalty );
- score += penalty;
+ reporterr( "%#3d", node[i][j] );
+ }
+ reporterr( "\n" );
+ }
+#endif
+}
+
+void counteff_simple_double( int nseq, int ***topol, double **len, double *node )
+{
+ int i, j, s1, s2;
+ double total;
+ static double rootnode[M];
+ static double eff[M];
+
+#if DEBUG
+ for( i=0; i<nseq; i++ ){
+ reporterr( "len0 = %f\n", len[i][0] );
+ reporterr( "len1 = %f\n", len[i][1] );
+ }
+#endif
+ for( i=0; i<nseq; i++ )
+ {
+ rootnode[i] = 0.0;
+ eff[i] = 1.0;
+/*
+ rootnode[i] = 1.0;
+*/
+ }
+ for( i=0; i<nseq-1; i++ )
+ {
+ for( j=0; (s1=topol[i][0][j]) > -1; j++ )
+ {
+ rootnode[s1] += (double)len[i][0] * eff[s1];
+ eff[s1] *= 0.5;
+/*
+ rootnode[s1] *= 0.5;
+*/
+
+ }
+ for( j=0; (s2=topol[i][1][j]) > -1; j++ )
+ {
+ rootnode[s2] += (double)len[i][1] * eff[s2];
+ eff[s2] *= 0.5;
+/*
+ rootnode[s2] *= 0.5;
+*/
+
+ }
+ }
+ for( i=0; i<nseq; i++ )
+ {
+#if 1 /* 97.9.29 */
+ rootnode[i] += GETA3;
+#endif
+#if 0
+ reporterr( "### rootnode for %d = %f\n", i, rootnode[i] );
+#endif
+ }
+#if 1
+ total = 0.0;
+ for( i=0; i<nseq; i++ )
+ {
+ total += rootnode[i];
+ }
+#else
+ total = 1.0;
+#endif
+
+ for( i=0; i<nseq; i++ )
+ {
+ node[i] = rootnode[i] / total;
+ }
+
+#if 0
+ reporterr( "weight array in counteff_simple\n" );
+ for( i=0; i<nseq; i++ )
+ reporterr( "%f\n", node[i] );
+ printf( "\n" );
+ exit( 1 );
+#endif
+}
+
+void counteff_simple_double_nostatic_memsave( int nseq, int ***topol, double **len, Treedep *dep, double *node )
+{
+ int i, j, s1, s2;
+ double total;
+ double *rootnode;
+ double *eff;
+ int **localmem;
+ int posinmem;
+
+ rootnode = AllocateDoubleVec( nseq );
+ eff = AllocateDoubleVec( nseq );
+ localmem = AllocateIntMtx( 2, nseq+1 );
+
+ for( i=0; i<nseq; i++ ) // 2014/06/07, fu no eff wo sakeru.
+ {
+ if( len[i][0] < 0.0 )
+ {
+ reporterr( "WARNING: negative branch length %f, step %d-0\n", len[i][0], i );
+ len[i][0] = 0.0;
+ }
+ if( len[i][1] < 0.0 )
+ {
+ reporterr( "WARNING: negative branch length %f, step %d-1\n", len[i][1], i );
+ len[i][1] = 0.0;
+ }
+ }
+#if DEBUG
+ for( i=0; i<nseq-1; i++ )
+ {
+ reporterr( "\nstep %d, group 0\n", i );
+ for( j=0; topol[i][0][j]!=-1; j++) reporterr( "%3d ", topol[i][0][j] );
+ reporterr( "\n", i );
+ reporterr( "step %d, group 1\n", i );
+ for( j=0; topol[i][1][j]!=-1; j++) reporterr( "%3d ", topol[i][1][j] );
+ reporterr( "\n", i );
+ reporterr( "len0 = %f\n", len[i][0] );
+ reporterr( "len1 = %f\n", len[i][1] );
+ }
+#endif
+ for( i=0; i<nseq; i++ )
+ {
+ rootnode[i] = 0.0;
+ eff[i] = 1.0;
+/*
+ rootnode[i] = 1.0;
+*/
+ }
+ for( i=0; i<nseq-1; i++ )
+ {
+ localmem[0][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[0], &posinmem, topol, dep, i, 0 );
+ localmem[1][0] = -1;
+ posinmem = 0;
+ topolorder( njob, localmem[1], &posinmem, topol, dep, i, 1 );
+
+ for( j=0; (s1=localmem[0][j]) > -1; j++ )
+ {
+ rootnode[s1] += (double)len[i][0] * eff[s1];
+ eff[s1] *= 0.5;
+/*
+ rootnode[s1] *= 0.5;
+*/
+
+ }
+ for( j=0; (s2=localmem[1][j]) > -1; j++ )
+ {
+ rootnode[s2] += (double)len[i][1] * eff[s2];
+ eff[s2] *= 0.5;
+/*
+ rootnode[s2] *= 0.5;
+*/
+
+ }
+ }
+ for( i=0; i<nseq; i++ )
+ {
+#if 1 /* 97.9.29 */
+ rootnode[i] += GETA3;
+#endif
+#if 0
+ reporterr( "### rootnode for %d = %f\n", i, rootnode[i] );
+#endif
+ }
+#if 1
+ total = 0.0;
+ for( i=0; i<nseq; i++ )
+ {
+ total += rootnode[i];
+ }
+#else
+ total = 1.0;
+#endif
+
+ for( i=0; i<nseq; i++ )
+ {
+ node[i] = rootnode[i] / total;
+ }
+
+#if 0
+ reporterr( "weight array in counteff_simple\n" );
+ for( i=0; i<nseq; i++ )
+ reporterr( "%f\n", node[i] );
+ printf( "\n" );
+ exit( 1 );
+#endif
+ free( rootnode );
+ free( eff );
+ FreeIntMtx( localmem );
+}
+
+void counteff_simple_double_nostatic( int nseq, int ***topol, double **len, double *node )
+{
+ int i, j, s1, s2;
+ double total;
+ double *rootnode;
+ double *eff;
+
+ rootnode = AllocateDoubleVec( nseq );
+ eff = AllocateDoubleVec( nseq );
+
+ for( i=0; i<nseq; i++ ) // 2014/06/07, fu no eff wo sakeru.
+ {
+ if( len[i][0] < 0.0 )
+ {
+ reporterr( "WARNING: negative branch length %f, step %d-0\n", len[i][0], i );
+ len[i][0] = 0.0;
+ }
+ if( len[i][1] < 0.0 )
+ {
+ reporterr( "WARNING: negative branch length %f, step %d-1\n", len[i][1], i );
+ len[i][1] = 0.0;
+ }
+ }
+#if DEBUG
+ for( i=0; i<nseq-1; i++ )
+ {
+ reporterr( "\nstep %d, group 0\n", i );
+ for( j=0; topol[i][0][j]!=-1; j++) reporterr( "%3d ", topol[i][0][j] );
+ reporterr( "\n", i );
+ reporterr( "step %d, group 1\n", i );
+ for( j=0; topol[i][1][j]!=-1; j++) reporterr( "%3d ", topol[i][1][j] );
+ reporterr( "\n", i );
+ reporterr( "len0 = %f\n", len[i][0] );
+ reporterr( "len1 = %f\n", len[i][1] );
+ }
+#endif
+ for( i=0; i<nseq; i++ )
+ {
+ rootnode[i] = 0.0;
+ eff[i] = 1.0;
+/*
+ rootnode[i] = 1.0;
+*/
+ }
+ for( i=0; i<nseq-1; i++ )
+ {
+ for( j=0; (s1=topol[i][0][j]) > -1; j++ )
+ {
+ rootnode[s1] += (double)len[i][0] * eff[s1];
+ eff[s1] *= 0.5;
+/*
+ rootnode[s1] *= 0.5;
+*/
+
+ }
+ for( j=0; (s2=topol[i][1][j]) > -1; j++ )
+ {
+ rootnode[s2] += (double)len[i][1] * eff[s2];
+ eff[s2] *= 0.5;
+/*
+ rootnode[s2] *= 0.5;
+*/
+
+ }
+ }
+ for( i=0; i<nseq; i++ )
+ {
+#if 1 /* 97.9.29 */
+ rootnode[i] += GETA3;
+#endif
+#if 0
+ reporterr( "### rootnode for %d = %f\n", i, rootnode[i] );
+#endif
+ }
+#if 1
+ total = 0.0;
+ for( i=0; i<nseq; i++ )
+ {
+ total += rootnode[i];
+ }
+#else
+ total = 1.0;
+#endif
+
+ for( i=0; i<nseq; i++ )
+ {
+ node[i] = rootnode[i] / total;
+ }
+
+#if 0
+ reporterr( "weight array in counteff_simple\n" );
+ for( i=0; i<nseq; i++ )
+ reporterr( "%f\n", node[i] );
+ printf( "\n" );
+ exit( 1 );
+#endif
+ free( rootnode );
+ free( eff );
+}
+
+void counteff_simple( int nseq, int ***topol, double **len, double *node )
+{
+ int i, j, s1, s2;
+ double total;
+#if 0
+ static double rootnode[M];
+ static double eff[M];
+#else
+ double *rootnode;
+ double *eff;
+ rootnode = AllocateDoubleVec( nseq );
+ eff = AllocateDoubleVec( nseq );
+#endif
+
+#if DEBUG
+ for( i=0; i<nseq; i++ ){
+ reporterr( "len0 = %f\n", len[i][0] );
+ reporterr( "len1 = %f\n", len[i][1] );
+ }
+#endif
+ for( i=0; i<nseq; i++ )
+ {
+ rootnode[i] = 0.0;
+ eff[i] = 1.0;
+/*
+ rootnode[i] = 1.0;
+*/
+ }
+ for( i=0; i<nseq-1; i++ )
+ {
+ for( j=0; (s1=topol[i][0][j]) > -1; j++ )
+ {
+ rootnode[s1] += len[i][0] * eff[s1];
+ eff[s1] *= 0.5;
+/*
+ rootnode[s1] *= 0.5;
+*/
+
+ }
+ for( j=0; (s2=topol[i][1][j]) > -1; j++ )
+ {
+ rootnode[s2] += len[i][1] * eff[s2];
+ eff[s2] *= 0.5;
+/*
+ rootnode[s2] *= 0.5;
+*/
+
+ }
+ }
+ for( i=0; i<nseq; i++ )
+ {
+#if 1 /* 97.9.29 */
+ rootnode[i] += GETA3;
+#endif
+#if 0
+ reporterr( "### rootnode for %d = %f\n", i, rootnode[i] );
+#endif
+ }
+#if 1
+ total = 0.0;
+ for( i=0; i<nseq; i++ )
+ {
+ total += rootnode[i];
+ }
+#else
+ total = 1.0;
+#endif
+
+ for( i=0; i<nseq; i++ )
+ {
+ node[i] = rootnode[i] / total;
+ }
+
+#if 0
+ reporterr( "weight array in counteff_simple\n" );
+ for( i=0; i<nseq; i++ )
+ reporterr( "%f\n", node[i] );
+ printf( "\n" );
+ exit( 1 );
+#endif
+#if 1
+ free( rootnode );
+ free( eff );
+#endif
+}
+
+
+void counteff( int nseq, int ***topol, double **len, double **node )
+{
+ int i, j, k, s1, s2;
+ double rootnode[M];
+ double eff[M];
+
+ if( mix )
+ {
+ switch( weight )
+ {
+ case( 2 ):
+ weight = 3;
+ break;
+ case( 3 ):
+ weight = 2;
+ break;
+ default:
+ ErrorExit( "mix error" );
+ break;
+ }
+ }
+
+ if( weight == 2 )
+ {
+ for( i=0; i<nseq; i++ ) rootnode[i] = 0;
+ for( i=0; i<nseq-2; i++ )
+ {
+ for( j=0; topol[i][0][j]>-1; j++ )
+ rootnode[topol[i][0][j]]++;
+ for( j=0; topol[i][1][j]>-1; j++ )
+ rootnode[topol[i][1][j]]++;
+ for( j=0; topol[i][0][j]>-1; j++ )
+ {
+ s1 = topol[i][0][j];
+ for( k=0; topol[i][1][k]>-1; k++ )
+ {
+ s2 = topol[i][1][k];
+ node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2] - 1;
+ }
+ }
+ }
+ for( j=0; topol[nseq-2][0][j]>-1; j++ )
+ {
+ s1 = topol[nseq-2][0][j];
+ for( k=0; topol[nseq-2][1][k]>-1; k++ )
+ {
+ s2 = topol[nseq-2][1][k];
+ node[MIN(s1,s2)][MAX(s1,s2)] = rootnode[s1] + rootnode[s2];
+ }
+ }
+ for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+ node[i][j] = ipower( 0.5, (int)node[i][j] ) + geta2;
+ for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+ node[j][i] = node[i][j];
+ }
+
+ if( weight == 3 )
+ {
+#if DEBUG
+ for( i=0; i<nseq; i++ ){
+ reporterr( "len0 = %f\n", len[i][0] );
+ reporterr( "len1 = %f\n", len[i][1] );
+ }
+#endif
+ for( i=0; i<nseq; i++ )
+ {
+ rootnode[i] = 0.0;
+ eff[i] = 1.0;
+/*
+ rootnode[i] = 1.0;
+*/
+ }
+ for( i=0; i<nseq-1; i++ )
+ {
+ for( j=0; (s1=topol[i][0][j]) > -1; j++ )
+ {
+ rootnode[s1] += len[i][0] * eff[s1];
+ eff[s1] *= 0.5;
+/*
+ rootnode[s1] *= 0.5;
+*/
+
+ }
+ for( j=0; (s2=topol[i][1][j]) > -1; j++ )
+ {
+ rootnode[s2] += len[i][1] * eff[s2];
+ eff[s2] *= 0.5;
+/*
+ rootnode[s2] *= 0.5;
+*/
+
+ }
+ }
+ for( i=0; i<nseq; i++ )
+ {
+#if 1 /* 97.9.29 */
+ rootnode[i] += GETA3;
+#endif
+#if DEBUG
+ reporterr( "rootnode for %d = %f\n", i, rootnode[i] );
+#endif
+ }
+ for( i=0; i<nseq; i++ )
+ {
+ for( j=0; j<nseq; j++ )
+ if( j != i )
+ node[i][j] = (double)rootnode[i] * rootnode[j];
+ else node[i][i] = rootnode[i];
+ }
+ }
+
+#if 0
+ printf( "weight matrix in counteff\n" );
+ for( i=0; i<nseq; i++ )
+ {
+ for( j=0; j<nseq; j++ )
+ {
+ printf( "%f ", node[i][j] );
+ }
+ printf( "\n" );
+ }
+#endif
+}
+
+double score_calcp( char *seq1, char *seq2, int len )
+{
+ int k;
+ unsigned char ms1, ms2;
+ double tmpscore;
+ int len2 = len - 2;
+
+ tmpscore = 0.0;
+ for( k=0; k<len; k++ )
+ {
+ ms1 = (unsigned char)seq1[k];
+ ms2 = (unsigned char)seq2[k];
+ if( ms1 == '-' && ms2 == '-' ) continue;
+ tmpscore += (double)amino_dis[ms1][ms2];
+
+ if( ms1 == (int)'-' )
+ {
+ tmpscore += (double)penalty;
+ tmpscore += (double)amino_dis[ms1][ms2];
+ while( (ms1=(unsigned char)seq1[++k]) == '-' )
+ tmpscore += (double)amino_dis[ms1][ms2];
+ k--;
+ if( k >len2 ) break;
+ continue;
+ }
+ if( ms2 == (int)'-' )
+ {
+ tmpscore += (double)penalty;
+ tmpscore += (double)amino_dis[ms1][ms2];
+ while( (ms2=(unsigned char)seq2[++k]) == '-' )
+ tmpscore += (double)amino_dis[ms1][ms2];
+ k--;
+ if( k > len2 ) break;
+ continue;
+ }
+ }
+ return( tmpscore );
+}
+
+double score_calc1( char *seq1, char *seq2 ) /* method 1 */
+{
+ int k;
+ double score = 0.0;
+ int count = 0;
+ int len = strlen( seq1 );
+
+ for( k=0; k<len; k++ )
+ {
+ if( seq1[k] != '-' && seq2[k] != '-' )
+ {
+ score += (double)amino_dis[(unsigned char)seq1[k]][(unsigned char)seq2[k]];
+ count++;
+ }
+ }
+ if( count ) score /= (double)count;
+ else score = 1.0;
+ return( score );
+}
+
+double substitution_nid( char *seq1, char *seq2 )
+{
+ int k;
+ double s12;
+ int len = strlen( seq1 );
+
+ s12 = 0.0;
+ for( k=0; k<len; k++ )
+ if( seq1[k] != '-' && seq2[k] != '-' )
+ s12 += ( seq1[k] == seq2[k] );
+
+// fprintf( stdout, "s12 = %f\n", s12 );
+ return( s12 );
+}
+
+double substitution_score( char *seq1, char *seq2 )
+{
+ int k;
+ double s12;
+ int len = strlen( seq1 );
+
+ s12 = 0.0;
+ for( k=0; k<len; k++ )
+ if( seq1[k] != '-' && seq2[k] != '-' )
+ s12 += amino_dis[(unsigned char)seq1[k]][(unsigned char)seq2[k]];
+
+// fprintf( stdout, "s12 = %f\n", s12 );
+ return( s12 );
+}
+
+double substitution_hosei( char *seq1, char *seq2 ) /* method 1 */
+#if 0
+{
+ int k;
+ double score = 0.0;
+ int count = 0;
+ int len = strlen( seq1 );
+
+ for( k=0; k<len; k++ )
+ {
+ if( seq1[k] != '-' && seq2[k] != '-' )
+ {
+ score += (double)( seq1[k] != seq2[k] );
+ count++;
+ }
+ }
+ if( count ) score /= (double)count;
+ else score = 1.0;
+ if( score < 0.95 ) score = - log( 1.0 - score );
+ else score = 3.0;
+ return( score );
+}
+#else
+{
+ int count = 0;
+ double score;
+ int iscore = 0;
+ char s1, s2;
+
+ while( (s1=*seq1++) )
+ {
+ s2 = *seq2++;
+ if( s1 == '-' ) continue;
+ if( s2 == '-' ) continue;
+ iscore += ( s1 != s2 );
+ count++;
+ }
+ if( count ) score = (double)iscore / count;
+ else score = 1.0;
+ if( score < 0.95 ) score = - log( 1.0 - score );
+ else score = 3.0;
+ return( score );
+}
+#endif
+
+double substitution( char *seq1, char *seq2 ) /* method 1 */
+{
+ int k;
+ double score = 0.0;
+ int count = 0;
+ int len = strlen( seq1 );
+
+ for( k=0; k<len; k++ )
+ {
+ if( seq1[k] != '-' && seq2[k] != '-' )
+ {
+ score += (double)( seq1[k] != seq2[k] );
+ count++;
+ }
+ }
+ if( count ) score /= (double)count;
+ else score = 1.0;
+ return( score );
+}
+
+
+void treeconstruction( char **seq, int nseq, int ***topol, double **len, double **eff )
+{
+ int i, j;
+
+ if( weight > 1 )
+ {
+ if( utree == 0 )
+ {
+ for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+ {
+/*
+ eff[i][j] = (double)score_calc1( seq[i], seq[j] );
+*/
+ eff[i][j] = (double)substitution_hosei( seq[i], seq[j] );
+ /*
+ reporterr( "%f\n", eff[i][j] );
+ */
+ }
+/*
+ reporterr( "distance matrix\n" );
+ for( i=0; i<nseq; i++ )
+ {
+ for( j=0; j<nseq; j++ )
+ {
+ reporterr( "%f ", eff[i][j] );
+ }
+ reporterr( "\n" );
+ }
+*/
+/*
+ upg( nseq, eff, topol, len );
+ upg2( nseq, eff, topol, len );
+*/
+ spg( nseq, eff, topol, len );
+ counteff( nseq, topol, len, eff );
+ }
+ }
+ else
+ {
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
+ eff[i][j] = 1.0;
+ }
+/*
+reporterr( "weight matrix\n" );
+for( i=0; i<nseq; i++ )
+{
+ for( j=0; j<nseq; j++ )
+ {
+ reporterr( "%f ", eff[i][j] );
+ }
+ reporterr( "\n" );
+}
+*/
+}
+
+double bscore_calc( char **seq, int s, double **eff ) /* algorithm B */
+{
+ int i, j, k;
+ int gb1, gb2, gc1, gc2;
+ int cob;
+ int nglen;
+ int len = strlen( seq[0] );
+ long score;
+
+ score = 0;
+ nglen = 0;
+ for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
+ {
+ double efficient = eff[i][j];
+
+ gc1 = 0;
+ gc2 = 0;
+ for( k=0; k<len; k++ )
+ {
+ gb1 = gc1;
+ gb2 = gc2;
+
+ gc1 = ( seq[i][k] == '-' );
+ gc2 = ( seq[j][k] == '-' );
+
+ cob =
+ !gb1 * gc1
+ * !gb2 * !gc2
+
+ + !gb1 * !gc1
+ * !gb2 * gc2
+
+ + !gb1 * gc1
+ * gb2 * !gc2
+
+ + gb1 * !gc1
+ * !gb2 * gc2
+
+ + gb1 * !gc1
+ * gb2 * gc2 *BEFF
+
+ + gb1 * gc1
+ * gb2 * !gc2 *BEFF
+ ;
+ score += (long)cob * penalty * efficient;
+ score += (long)amino_dis[(unsigned char)seq[i][k]][(unsigned char)seq[j][k]] * efficient;
+ nglen += ( !gc1 * !gc2 );
+ }
+ }
+ return( (double)score / nglen + 400.0 * !scoremtx );
+}
+
+void AllocateTmpSeqs( char ***mseq2pt, char **mseq1pt, int locnlenmax )
+{
+ *mseq2pt = AllocateCharMtx( njob, locnlenmax+1 );
+ *mseq1pt = AllocateCharVec( locnlenmax+1 );
+}
+
+void FreeTmpSeqs( char **mseq2, char *mseq1 )
+{
+ FreeCharMtx( mseq2 );
+ free( (char *)mseq1 );
+}
+
+
+void gappick0( char *aseq, char *seq )
+{
+ for( ; *seq != 0; seq++ )
+ {
+ if( *seq != '-' )
+ *aseq++ = *seq;
+ }
+ *aseq = 0;
+
+}
+
+int isallgap( char *seq )
+{
+ for( ; *seq != 0; seq++ )
+ {
+ if( *seq != '-' )
+ return( 0 );
+ }
+ return( 1 );
+}
+
+void gappick( int nseq, int s, char **aseq, char **mseq2,
+ double **eff, double *effarr )
+{
+ int i, j, count, countjob, len, allgap;
+ len = strlen( aseq[0] );
+ for( i=0, count=0; i<len; i++ )
+ {
+ allgap = 1;
+ for( j=0; j<nseq; j++ ) if( j != s ) allgap *= ( aseq[j][i] == '-' );
+ if( allgap == 0 )
+ {
+ for( j=0, countjob=0; j<nseq; j++ )
+ {
+ if( j != s )
+ {
+ mseq2[countjob][count] = aseq[j][i];
+ countjob++;
+ }
+ }
+ count++;
+ }
+ }
+ for( i=0; i<nseq-1; i++ ) mseq2[i][count] = 0;
+
+ for( i=0, countjob=0; i<nseq; i++ )
+ {
+ if( i != s )
+ {
+ effarr[countjob] = eff[s][i];
+ countjob++;
+ }
+ }
+/*
+fprintf( stdout, "effarr in gappick s = %d\n", s+1 );
+for( i=0; i<countjob; i++ )
+ fprintf( stdout, " %f", effarr[i] );
+printf( "\n" );
+*/
+}
+
+void commongappick_record( int nseq, char **seq, int *map )
+{
+ int i, j, count;
+ int len = strlen( seq[0] );
+
+
+ for( i=0, count=0; i<=len; i++ )
+ {
+ /*
+ allgap = 1;
+ for( j=0; j<nseq; j++ )
+ allgap *= ( seq[j][i] == '-' );
+ if( !allgap )
+ */
+ for( j=0; j<nseq; j++ )
+ if( seq[j][i] != '-' ) break;
+ if( j != nseq )
+ {
+ for( j=0; j<nseq; j++ )
+ {
+ seq[j][count] = seq[j][i];
+ }
+ map[count] = i;
+ count++;
+ }
+ }
+}
+
+
+void commongappick( int nseq, char **seq )
+{
+ int i, j, count;
+ int len = strlen( seq[0] );
+#if 1
+
+ int *mapfromnewtoold;
+
+ mapfromnewtoold = calloc( len+1, sizeof( int ) );
+
+ for( i=0, count=0; i<=len; i++ )
+ {
+ for( j=0; j<nseq; j++ )
+ if( seq[j][i] != '-' ) break;
+ if( j != nseq )
+ {
+ mapfromnewtoold[count++] = i;
+ }
+ }
+// mapfromnewtoold[count] = -1; // iranai
+ for( j=0; j<nseq; j++ )
+ {
+ for( i=0; i<count; i++ )
+ {
+ seq[j][i] = seq[j][mapfromnewtoold[i]];
+ }
+ }
+ free( mapfromnewtoold );
+#else
+--------------------------
+
+ int *mapfromoldtonew;
+ int pos;
+
+ mapfromoldtonew = calloc( len+1, sizeof( int ) );
+ for( i=0; i<=len; i++ ) mapfromoldtonew[i] = -1;
+
+ for( i=0, count=0; i<=len; i++ )
+ {
+ for( j=0; j<nseq; j++ )
+ if( seq[j][i] != '-' ) break;
+ if( j != nseq )
+ {
+ mapfromoldtonew[i] = count;
+ count++;
+ }
+ }
+ for( j=0; j<nseq; j++ )
+ {
+ for( i=0; i<=len; i++ )
+ {
+ if( (pos=mapfromoldtonew[i]) != -1 )
+ seq[j][pos] = seq[j][i];
+ }
+ }
+ free( mapfromoldtonew );
+--------------------------
+
+ for( i=0, count=0; i<=len; i++ )
+ {
+ /*
+ allgap = 1;
+ for( j=0; j<nseq; j++ )
+ allgap *= ( seq[j][i] == '-' );
+ if( !allgap )
+ */
+ for( j=0; j<nseq; j++ )
+ if( seq[j][i] != '-' ) break;
+ if( j != nseq )
+ {
+ for( j=0; j<nseq; j++ )
+ {
+ seq[j][count] = seq[j][i];
+ }
+ count++;
+ }
+ }
+
+#endif
+}
+
+#if 0
+void commongaprecord( int nseq, char **seq, char *originallygapped )
+{
+ int i, j;
+ int len = strlen( seq[0] );
+
+ for( i=0; i<len; i++ )
+ {
+ for( j=0; j<nseq; j++ )
+ if( seq[j][i] != '-' ) break;
+ if( j == nseq )
+ originallygapped[i] = '-';
+ else
+ originallygapped[i] = 'o';
+ }
+ originallygapped[len] = 0;
+}
+#endif
+
+double score_calc0( char **seq, int s, double **eff, int ex )
+{
+ double tmp;
+
+ if( scmtd == 4 ) tmp = score_calc4( seq, s, eff, ex );
+ if( scmtd == 5 ) tmp = score_calc5( seq, s, eff, ex );
+ else tmp = score_calc5( seq, s, eff, ex );
+
+ return( tmp );
+
+}
+
+/*
+double score_m_1( char **seq, int ex, double **eff )
+{
+ int i, j, k;
+ int len = strlen( seq[0] );
+ int gb1, gb2, gc1, gc2;
+ int cob;
+ int nglen;
+ double score;
+
+ score = 0.0;
+ nglen = 0;
+ for( i=0; i<njob; i++ )
+ {
+ double efficient = eff[MIN(i,ex)][MAX(i,ex)];
+ if( i == ex ) continue;
+
+ gc1 = 0;
+ gc2 = 0;
+ for( k=0; k<len; k++ )
+ {
+ gb1 = gc1;
+ gb2 = gc2;
+
+ gc1 = ( seq[i][k] == '-' );
+ gc2 = ( seq[ex][k] == '-' );
+
+ cob =
+ !gb1 * gc1
+ * !gb2 * !gc2
+
+ + !gb1 * !gc1
+ * !gb2 * gc2
+
+ + !gb1 * gc1
+ * gb2 * !gc2
+
+ + gb1 * !gc1
+ * !gb2 * gc2
+
+ + gb1 * !gc1
+ * gb2 * gc2 *BEFF
+
+ + gb1 * gc1
+ * gb2 * !gc2 *BEFF
+ ;
+ score += (double)cob * penalty * efficient;
+ score += (double)amino_dis[seq[i][k]][seq[ex][k]] * efficient;
+ *
+ nglen += ( !gc1 * !gc2 );
+ *
+ if( !gc1 && !gc2 ) fprintf( stdout, "%f\n", score );
+ }
+ }
+ return( (double)score / nglen + 400.0 * !scoremtx );
+}
+*/
+
+#if 0
+void sitescore( char **seq, double **eff, char sco1[], char sco2[], char sco3[] )
+{
+ int i, j, k;
+ int len = strlen( seq[0] );
+ double tmp;
+ double count;
+ int ch;
+ double sco[N];
+
+ for( i=0; i<len; i++ )
+ {
+ tmp = 0.0; count = 0;
+ for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
+ {
+ /*
+ if( seq[j][i] != '-' && seq[k][i] != '-' )
+ */
+ {
+ tmp += amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx;
+ count++;
+ }
+ }
+ if( count > 0.0 ) tmp /= count;
+ else( tmp = 0.0 );
+ ch = (int)( tmp/100.0 - 0.000001 );
+ sprintf( sco1+i, "%c", ch+0x61 );
+ }
+ sco1[len] = 0;
+
+ for( i=0; i<len; i++ )
+ {
+ tmp = 0.0; count = 0;
+ for( j=0; j<njob-1; j++ ) for( k=j+1; k<njob; k++ )
+ {
+ /*
+ if( seq[j][i] != '-' && seq[k][i] != '-' )
+ */
+ {
+ tmp += eff[j][k] * ( amino_dis[seq[j][i]][seq[k][i]] + 400 * !scoremtx );
+ count += eff[j][k];
+ }
+ }
+ if( count > 0.0 ) tmp /= count;
+ else( tmp = 0.0 );
+ tmp = ( tmp - 400 * !scoremtx ) * 2;
+ if( tmp < 0 ) tmp = 0;
+ ch = (int)( tmp/100.0 - 0.000001 );
+ sprintf( sco2+i, "%c", ch+0x61 );
+ sco[i] = tmp;
+ }
+ sco2[len] = 0;
+
+ for( i=WIN; i<len-WIN; i++ )
+ {
+ tmp = 0.0;
+ for( j=i-WIN; j<=i+WIN; j++ )
+ {
+ tmp += sco[j];
+ }
+ for( j=0; j<njob; j++ )
+ {
+ if( seq[j][i] == '-' )
+ {
+ tmp = 0.0;
+ break;
+ }
+ }
+ tmp /= WIN * 2 + 1;
+ ch = (int)( tmp/100.0 - 0.0000001 );
+ sprintf( sco3+i, "%c", ch+0x61 );
+ }
+ for( i=0; i<WIN; i++ ) sco3[i] = '-';
+ for( i=len-WIN; i<len; i++ ) sco3[i] = '-';
+ sco3[len] = 0;
+}
+#endif
+
+void strins( char *str1, char *str2 )
+{
+ char *bk;
+ int len1 = strlen( str1 );
+ int len2 = strlen( str2 );
+
+ bk = str2;
+ str2 += len1+len2;
+ str1 += len1-1;
+
+ while( str2 >= bk+len1 ) { *str2 = *(str2-len1); str2--;} // by D.Mathog
+ while( str2 >= bk ) { *str2-- = *str1--; }
+}
+
+int isaligned( int nseq, char **seq )
+{
+ int i;
+ int len = strlen( seq[0] );
+ for( i=1; i<nseq; i++ )
+ {
+ if( strlen( seq[i] ) != len ) return( 0 );
+ }
+ return( 1 );
+}
+
+double score_calc_for_score( int nseq, char **seq )
+{
+ int i, j, k, c;
+ int len = strlen( seq[0] );
+ double score;
+ double tmpscore;
+ char *mseq1, *mseq2;
+
+ score = 0.0;
+ for( i=0; i<nseq-1; i++ )
+ {
+ for( j=i+1; j<nseq; j++ )
+ {
+ mseq1 = seq[i];
+ mseq2 = seq[j];
+ tmpscore = 0.0;
+ c = 0;
+ for( k=0; k<len; k++ )
+ {
+ if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
+ tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
+ c++;
+ if( mseq1[k] == '-' )
+ {
+ tmpscore += penalty - n_dis[0][24];
+ while( mseq1[++k] == '-' )
+ ;
+ k--;
+ if( k > len-2 ) break;
+ continue;
+ }
+ if( mseq2[k] == '-' )
+ {
+ tmpscore += penalty - n_dis[0][24];
+ while( mseq2[++k] == '-' )
+ ;
+ k--;
+ if( k > len-2 ) break;
+ continue;
+ }
+ }
+ score += (double)tmpscore / (double)c;
+#if DEBUG
+ printf( "tmpscore in mltaln9.c = %f\n", tmpscore );
+ printf( "tmpscore / c = %f\n", tmpscore/(double)c );
+#endif
+ }
+ }
+ reporterr( "raw score = %f\n", score );
+ score /= (double)nseq * ( nseq-1.0 ) / 2.0;
+ score += 400.0;
+#if DEBUG
+ printf( "score in mltaln9.c = %f\n", score );
+#endif
+ return( (double)score );
+}
+
+void doublencpy( double *vec1, double *vec2, int len )
+{
+ while( len-- )
+ *vec1++ = *vec2++;
+}
+
+double score_calc_a( char **seq, int s, double **eff ) /* algorithm A+ */
+{
+ int i, j, k;
+ int gb1, gb2, gc1, gc2;
+ int cob;
+ int nglen;
+ int len = strlen( seq[0] );
+ double score;
+
+ score = 0;
+ nglen = 0;
+ for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
+ {
+ double efficient = eff[i][j];
+
+ gc1 = 0;
+ gc2 = 0;
+ for( k=0; k<len; k++ )
+ {
+ gb1 = gc1;
+ gb2 = gc2;
+
+ gc1 = ( seq[i][k] == '-' );
+ gc2 = ( seq[j][k] == '-' );
+
+ cob =
+ !gb1 * gc1
+ * !gb2 * !gc2
+
+ + gb1 * !gc1
+ * !gb2 * !gc2
+
+ + !gb1 * !gc1
+ * !gb2 * gc2
+
+ + !gb1 * !gc1
+ * gb2 * !gc2
+
+ + !gb1 * gc1
+ * gb2 * !gc2
+
+ + gb1 * !gc1
+ * !gb2 * gc2
+
+ + gb1 * !gc1
+ * gb2 * gc2
+
+ + gb1 * gc1
+ * gb2 * !gc2
+
+ + !gb1 * gc1
+ * gb2 * gc2
+
+ + gb1 * gc1
+ * !gb2 * gc2
+ ;
+ score += 0.5 * (double)cob * penalty * efficient;
+ score += (double)amino_dis[(unsigned char)seq[i][k]][(unsigned char)seq[j][k]] * (double)efficient;
+ nglen += ( !gc1 * !gc2 );
+ }
+ }
+ return( (double)score / nglen + 400.0 * !scoremtx );
+}
+
+
+double score_calc_s( char **seq, int s, double **eff ) /* algorithm S, not used */
+{
+ int i, j, k;
+ int gb1, gb2, gc1, gc2;
+ int cob;
+ int nglen;
+ int len = strlen( seq[0] );
+ double score;
+
+ score = 0;
+ nglen = 0;
+ for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
+ {
+ double efficient = eff[i][j];
+
+ gc1 = 0;
+ gc2 = 0;
+ for( k=0; k<len; k++ )
+ {
+ gb1 = gc1;
+ gb2 = gc2;
+
+ gc1 = ( seq[i][k] == '-' );
+ gc2 = ( seq[j][k] == '-' );
+
+ cob =
+ !gb1 * gc1
+ * !gb2 * !gc2
+
+ + gb1 * !gc1
+ * !gb2 * !gc2
+
+ + !gb1 * !gc1
+ * !gb2 * gc2
+
+ + !gb1 * !gc1
+ * gb2 * !gc2
+
+ + !gb1 * gc1
+ * gb2 * !gc2
+
+ + gb1 * !gc1
+ * !gb2 * gc2
+
+#if 0
+ + gb1 * !gc1
+ * gb2 * gc2
+
+ + gb1 * gc1
+ * gb2 * !gc2
+
+ + !gb1 * gc1
+ * gb2 * gc2
+
+ + gb1 * gc1
+ * !gb2 * gc2
+#endif
+ ;
+ score += 0.5 * (double)cob * penalty * efficient;
+ score += (double)amino_dis[(unsigned char)seq[i][k]][(int)seq[j][k]] * (double)efficient;
+ nglen += ( !gc1 * !gc2 );
+ }
+ }
+ return( (double)score / nglen + 400.0 );
+}
+
+double score_calc_for_score_s( int s, char **seq ) /* algorithm S */
+{
+ int i, j, k;
+ int gb1, gb2, gc1, gc2;
+ int cob;
+ int nglen;
+ int len = strlen( seq[0] );
+ double score;
+
+ score = 0;
+ nglen = 0;
+ for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
+ {
+
+ gc1 = 0;
+ gc2 = 0;
+ for( k=0; k<len; k++ )
+ {
+ gb1 = gc1;
+ gb2 = gc2;
+
+ gc1 = ( seq[i][k] == '-' );
+ gc2 = ( seq[j][k] == '-' );
+
+ cob =
+ !gb1 * gc1
+ * !gb2 * !gc2
+
+ + gb1 * !gc1
+ * !gb2 * !gc2
+
+ + !gb1 * !gc1
+ * !gb2 * gc2
+
+ + !gb1 * !gc1
+ * gb2 * !gc2
+
+ + !gb1 * gc1
+ * gb2 * !gc2
+
+ + gb1 * !gc1
+ * !gb2 * gc2
+
+#if 0
+ + gb1 * !gc1
+ * gb2 * gc2
+
+ + gb1 * gc1
+ * gb2 * !gc2
+
+ + !gb1 * gc1
+ * gb2 * gc2
+
+ + gb1 * gc1
+ * !gb2 * gc2
+#endif
+ ;
+ score += 0.5 * (double)cob * penalty;
+ score += (double)amino_dis[(int)seq[i][k]][(unsigned char)seq[j][k]];
+ nglen += ( !gc1 * !gc2 );
+ }
+#if 0
+ reporterr( "i = %d, j=%d\n", i+1, j+1 );
+ reporterr( "score = %f\n", score );
+#endif
+ }
+ return( (double)score / nglen + 400.0 );
+}
+
+double SSPscore___( int s, char **seq, int ex ) /* algorithm S */
+{
+ int i, j, k;
+ int gb1, gb2, gc1, gc2;
+ int cob;
+ int nglen;
+ int len = strlen( seq[0] );
+ double score;
+
+ score = 0;
+ nglen = 0;
+ i=ex; for( j=0; j<s; j++ )
+ {
+
+ if( j == ex ) continue;
+
+ gc1 = 0;
+ gc2 = 0;
+ for( k=0; k<len; k++ )
+ {
+ gb1 = gc1;
+ gb2 = gc2;
+
+ gc1 = ( seq[i][k] == '-' );
+ gc2 = ( seq[j][k] == '-' );
+
+ cob =
+ !gb1 * gc1
+ * !gb2 * !gc2
+
+ + gb1 * !gc1
+ * !gb2 * !gc2
+
+ + !gb1 * !gc1
+ * !gb2 * gc2
+
+ + !gb1 * !gc1
+ * gb2 * !gc2
+
+ + !gb1 * gc1
+ * gb2 * !gc2 * 2.0
+
+ + gb1 * !gc1
+ * !gb2 * gc2 * 2.0
+
+#if 0
+ + gb1 * !gc1
+ * gb2 * gc2
+
+ + gb1 * gc1
+ * gb2 * !gc2
+
+ + !gb1 * gc1
+ * gb2 * gc2
+
+ + gb1 * gc1
+ * !gb2 * gc2
+#endif
+ ;
+ score += 0.5 * (double)cob * penalty;
+ score += (double)amino_dis[(unsigned char)seq[i][k]][(unsigned char)seq[j][k]];
+ nglen += ( !gc1 * !gc2 ); /* tsukawanai */
+ }
+#if 0
+ reporterr( "i = %d, j=%d\n", i+1, j+1 );
+ reporterr( "score = %f\n", score );
+#endif
+ }
+ return( (double)score );
+}
+
+double SSPscore( int s, char **seq ) /* algorithm S */
+{
+ int i, j, k;
+ int gb1, gb2, gc1, gc2;
+ int cob;
+ int nglen;
+ int len = strlen( seq[0] );
+ double score;
+
+ score = 0;
+ nglen = 0;
+ for( i=0; i<s-1; i++ ) for( j=i+1; j<s; j++ )
+ {
+
+ gc1 = 0;
+ gc2 = 0;
+ for( k=0; k<len; k++ )
+ {
+ gb1 = gc1;
+ gb2 = gc2;
+
+ gc1 = ( seq[i][k] == '-' );
+ gc2 = ( seq[j][k] == '-' );
+
+ cob =
+ !gb1 * gc1
+ * !gb2 * !gc2
+
+ + gb1 * !gc1
+ * !gb2 * !gc2
+
+ + !gb1 * !gc1
+ * !gb2 * gc2
+
+ + !gb1 * !gc1
+ * gb2 * !gc2
+
+ + !gb1 * gc1
+ * gb2 * !gc2
+
+ + gb1 * !gc1
+ * !gb2 * gc2
+
+#if 0
+ + gb1 * !gc1
+ * gb2 * gc2
+
+ + gb1 * gc1
+ * gb2 * !gc2
+
+ + !gb1 * gc1
+ * gb2 * gc2
+
+ + gb1 * gc1
+ * !gb2 * gc2
+#endif
+ ;
+ score += 0.5 * (double)cob * penalty;
+ score += (double)amino_dis[(unsigned char)seq[i][k]][(unsigned char)seq[j][k]];
+ nglen += ( !gc1 * !gc2 ); /* tsukawanai */
+ }
+#if 0
+ reporterr( "i = %d, j=%d\n", i+1, j+1 );
+ reporterr( "score = %f\n", score );
+#endif
+ }
+ return( (double)score );
+}
+
+
+
+double DSPscore( int s, char **seq ) /* method 3 deha nai */
+{
+ int i, j, k;
+ double c;
+ int len = strlen( seq[0] );
+ double score;
+ double tmpscore;
+ char *mseq1, *mseq2;
+#if DEBUG
+ FILE *fp;
+#endif
+
+ score = 0.0;
+ c = 0.0;
+
+ for( i=0; i<s-1; i++ )
+ {
+ for( j=i+1; j<s; j++ )
+ {
+ mseq1 = seq[i];
+ mseq2 = seq[j];
+ tmpscore = 0.0;
+ for( k=0; k<len; k++ )
+ {
+ if( mseq1[k] == '-' && mseq2[k] == '-' ) continue;
+ tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
+
+ if( mseq1[k] == '-' )
+ {
+ tmpscore += penalty;
+ while( mseq1[++k] == '-' )
+ tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
+ k--;
+ if( k > len-2 ) break;
+ continue;
+ }
+ if( mseq2[k] == '-' )
+ {
+ tmpscore += penalty;
+ while( mseq2[++k] == '-' )
+ tmpscore += amino_dis[(unsigned char)mseq1[k]][(unsigned char)mseq2[k]];
+ k--;
+ if( k > len-2 ) break;
+ continue;
+ }
+ }
+ score += (double)tmpscore;
+ }
+ }
+
+ return( score );
+}
+
+
+#define SEGMENTSIZE 150
+
+int searchAnchors( int nseq, char **seq, Segment *seg )
+{
+ int i, j, k, kcyc;
+ int status;
+ double score;
+ int value = 0;
+ int len;
+ int length;
+ static double *stra = NULL;
+ static int alloclen = 0;
+ double cumscore;
+ static double threshold;
+
+ len = strlen( seq[0] );
+ if( alloclen < len )
+ {
+ if( alloclen )
+ {
+ FreeDoubleVec( stra );
+ }
+ else
+ {
+ threshold = (int)divThreshold / 100.0 * 600.0 * divWinSize;
+ }
+ stra = AllocateDoubleVec( len );
+ alloclen = len;
+ }
+
+ for( i=0; i<len; i++ )
+ {
+ stra[i] = 0.0;
+ kcyc = nseq-1;
+ for( k=0; k<kcyc; k++ ) for( j=k+1; j<nseq; j++ )
+ stra[i] += n_dis[(int)amino_n[(unsigned char)seq[k][i]]][(int)amino_n[(unsigned char)seq[j][i]]];
+ stra[i] /= (double)nseq * ( nseq-1 ) / 2;
+ }
+
+ (seg+0)->skipForeward = 0;
+ (seg+1)->skipBackward = 0;
+ status = 0;
+ cumscore = 0.0;
+ score = 0.0;
+ length = 0; /* modified at 01/09/11 */
+ for( j=0; j<divWinSize; j++ ) score += stra[j];
+ for( i=1; i<len-divWinSize; i++ )
+ {
+ score = score - stra[i-1] + stra[i+divWinSize-1];
+#if DEBUG
+ reporterr( "%d %f ? %f", i, score, threshold );
+ if( score > threshold ) reporterr( "YES\n" );
+ else reporterr( "NO\n" );
+#endif
+
+ if( score > threshold )
+ {
+ if( !status )
+ {
+ status = 1;
+ seg->start = i;
+ length = 0;
+ cumscore = 0.0;
+ }
+ length++;
+ cumscore += score;
+ }
+ if( score <= threshold || length > SEGMENTSIZE )
+ {
+ if( status )
+ {
+ seg->end = i;
+ seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
+ seg->score = cumscore;
+#if DEBUG
+ reporterr( "%d-%d length = %d\n", seg->start, seg->end, length );
+#endif
+ if( length > SEGMENTSIZE )
+ {
+ (seg+0)->skipForeward = 1;
+ (seg+1)->skipBackward = 1;
+ }
+ else
+ {
+ (seg+0)->skipForeward = 0;
+ (seg+1)->skipBackward = 0;
+ }
+ length = 0;
+ cumscore = 0.0;
+ status = 0;
+ value++;
+ seg++;
+ if( value > MAXSEG - 3 ) ErrorExit( "TOO MANY SEGMENTS!");
+ }
+ }
+ }
+ if( status )
+ {
+ seg->end = i;
+ seg->center = ( seg->start + seg->end + divWinSize ) / 2 ;
+ seg->score = cumscore;
+#if DEBUG
+reporterr( "%d-%d length = %d\n", seg->start, seg->end, length );
+#endif
+ value++;
+ }
+ return( value );
+}
+
+void dontcalcimportance_target( int nseq, double *eff, char **seq, LocalHom **localhom, int ntarget )
+{
+ int i, j;
+ LocalHom *ptr;
+ int *nogaplen;
+
+ nogaplen = AllocateIntVec( nseq );
+
+ for( i=0; i<nseq; i++ )
+ {
+ nogaplen[i] = seqlen( seq[i] );
+// reporterr( "nogaplen[%d] = %d\n", i, nogaplen[i] );
+ }
+
+ for( i=0; i<ntarget; i++ )
+ {
+ for( j=0; j<nseq; j++ )
+ {
+ for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
+ {
+// reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr );
+#if 1
+ ptr->importance = ptr->opt / ptr->overlapaa;
+// ptr->fimportance = (double)ptr->importance;
+#else
+ ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
+#endif
+ }
+ }
+ }
+ free( nogaplen );
+}
+void dontcalcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
+{
+ int i, j;
+ LocalHom *ptr;
+ int *nogaplen;
+
+ nogaplen = AllocateIntVec( nseq );
+
+ for( i=0; i<nseq; i++ )
+ {
+ nogaplen[i] = seqlen( seq[i] );
+// reporterr( "nogaplen[%d] = %d\n", i, nogaplen[i] );
+ }
+
+ for( i=0; i<nseq; i++ )
+ {
+ for( j=0; j<nseq; j++ )
+ {
+ for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
+ {
+// reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr );
+#if 1
+ ptr->importance = ptr->opt / ptr->overlapaa;
+// ptr->fimportance = (double)ptr->importance;
+#else
+ ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
+#endif
+ }
+ }
+ }
+ free( nogaplen );
+}
+
+void dontcalcimportance_firstone( int nseq, double *eff, char **seq, LocalHom **localhom )
+{
+ int i, j, nseq1;
+ LocalHom *ptr;
+#if 1
+#else
+ int *nogaplen;
+ nogaplen = AllocateIntVec( nseq );
+ for( i=0; i<nseq; i++ )
+ {
+ nogaplen[i] = seqlen( seq[i] );
+// reporterr( "nogaplen[%d] = %d\n", i, nogaplen[i] );
+ }
+#endif
+
+ nseq1 = nseq - 1;
+ for( i=0; i<nseq1; i++ )
+ {
+ j=0;
+ {
+ for( ptr=localhom[i]+j; ptr; ptr=ptr->next )
+ {
+// reporterr( "i,j=%d,%d,ptr=%p\n", i, j, ptr );
+#if 1
+// ptr->importance = ptr->opt / ptr->overlapaa;
+ ptr->importance = ptr->opt * 0.5; // tekitou
+// ptr->fimportance = (double)ptr->importance;
+// reporterr( "i=%d, j=%d, importance = %f, opt=%f\n", i, j, ptr->fimportance, ptr->opt );
+#else
+ ptr->importance = ptr->opt / MIN( nogaplen[i], nogaplen[j] );
+#endif
+ }
+ }
+ }
+#if 1
+#else
+ free( nogaplen );
+#endif
+}
+
+void calcimportance_target( int nseq, int ntarget, double *eff, char **seq, LocalHom **localhom, int *targetmap, int *targetmapr )
+{
+ int i, j, pos, len, ti, tj;
+ double *importance; // static -> local, 2012/02/25
+ double tmpdouble;
+ double *ieff, totaleff; // counteff_simple_double ni utsusu kamo
+ int *nogaplen; // static -> local, 2012/02/25
+ LocalHom *tmpptr;
+
+ importance = AllocateDoubleVec( nlenmax );
+ nogaplen = AllocateIntVec( nseq );
+ ieff = AllocateDoubleVec( nseq );
+
+ totaleff = 0.0;
+ for( i=0; i<nseq; i++ )
+ {
+ nogaplen[i] = seqlen( seq[i] );
+// reporterr( "nogaplen[] = %d\n", nogaplen[i] );
+ if( nogaplen[i] == 0 ) ieff[i] = 0.0;
+ else ieff[i] = eff[i];
+ totaleff += ieff[i];
+ }
+ for( i=0; i<nseq; i++ ) ieff[i] /= totaleff;
+ for( i=0; i<nseq; i++ ) printf( "eff[%d] = %30.25f\n", i, ieff[i] );
+
+#if 0
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
+ {
+ tmpptr = localhom[i]+j;
+ reporterr( "%d-%d\n", i, j );
+ do
+ {
+ reporterr( "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
+ } while( tmpptr=tmpptr->next );
+ }
+#endif
+
+
+// for( i=0; i<nseq; i++ )
+ for( ti=0; ti<ntarget; ti++ )
+ {
+ i = targetmapr[ti];
+// reporterr( "i = %d\n", i );
+ for( pos=0; pos<nlenmax; pos++ )
+ importance[pos] = 0.0;
+ for( j=0; j<nseq; j++ )
+ {
+ if( i == j ) continue;
+// tmpptr = localhom[ti]+j;
+ for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1 ) continue;
+ for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
+ {
+#if 1
+// if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos );
+ importance[pos] += ieff[j];
+#else
+ importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
+ importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa;
+#endif
+ }
+ }
+ }
+#if 0
+ reporterr( "position specific importance of seq %d:\n", i );
+ for( pos=0; pos<nlenmax; pos++ )
+ reporterr( "%d: %f\n", pos, importance[pos] );
+ reporterr( "\n" );
+#endif
+ for( j=0; j<nseq; j++ )
+ {
+// reporterr( "i=%d, j=%d\n", i, j );
+ if( i == j ) continue;
+ if( localhom[ti][j].opt == -1.0 ) continue;
+#if 1
+ for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1.0 ) continue;
+ tmpdouble = 0.0;
+ len = 0;
+ for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
+ {
+ tmpdouble += importance[pos];
+ len++;
+ }
+
+ tmpdouble /= (double)len;
+
+ tmpptr->importance = tmpdouble * tmpptr->opt;
+// tmpptr->fimportance = (double)tmpptr->importance;
+ }
+#else
+ tmpdouble = 0.0;
+ len = 0;
+ for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1.0 ) continue;
+ for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
+ {
+ tmpdouble += importance[pos];
+ len++;
+ }
+ }
+
+ tmpdouble /= (double)len;
+
+ for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1.0 ) continue;
+ tmpptr->importance = tmpdouble * tmpptr->opt;
+// tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //\e$B$J$+$C$?$3$H$K$9$k\e(B
+ }
+#endif
+
+// reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble );
+ }
+ }
+
+#if 0
+ printf( "before averaging:\n" );
+
+ for( ti=0; ti<ntarget; ti++ ) for( j=0; j<nseq; j++ )
+ {
+ i = targetmapr[ti];
+ if( i == j ) continue;
+ printf( "%d-%d\n", i, j );
+ for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%30.25f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
+ }
+ }
+#endif
+
+#if 1
+// reporterr( "average?\n" );
+// for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+ for( ti=0; ti<ntarget; ti++ ) for( tj=ti+1; tj<ntarget; tj++ )
+ {
+ double imp;
+ LocalHom *tmpptr1, *tmpptr2;
+
+ i = targetmapr[ti];
+ j = targetmapr[tj];
+// if( i == j ) continue;
+
+// reporterr( "i=%d, j=%d\n", i, j );
+
+ tmpptr1 = localhom[ti]+j; tmpptr2 = localhom[tj]+i;
+ for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
+ {
+ if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 )
+ {
+// reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
+ continue;
+ }
+// reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
+ imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
+ tmpptr1->importance = tmpptr2->importance = imp;
+// tmpptr1->fimportance = tmpptr2->fimportance = (double)imp;
+
+// reporterr( "## importance = %f\n", tmpptr1->importance );
+
+ }
+
+#if 0 // commented out, 2012/02/10
+ if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
+ {
+ reporterr( "ERROR: i=%d, j=%d\n", i, j );
+ exit( 1 );
+ }
+#endif
+ }
+
+ for( ti=0; ti<ntarget; ti++ ) for( j=0; j<nseq; j++ )
+ {
+ double imp;
+ LocalHom *tmpptr1;
+
+ i = targetmapr[ti];
+ if( i == j ) continue;
+ if( targetmap[j] != -1 ) continue;
+
+// reporterr( "i=%d, j=%d\n", i, j );
+
+ tmpptr1 = localhom[ti]+j;
+ for( ; tmpptr1; tmpptr1 = tmpptr1->next )
+ {
+ if( tmpptr1->opt == -1.0 )
+ {
+// reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
+ continue;
+ }
+// reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
+ imp = 0.5 * ( tmpptr1->importance );
+// imp = 1.0 * ( tmpptr1->importance );
+ tmpptr1->importance = imp;
+// tmpptr1->fimportance = (double)imp;
+
+// reporterr( "## importance = %f\n", tmpptr1->importance );
+
+ }
+
+#if 0 // commented out, 2012/02/10
+ if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
+ {
+ reporterr( "ERROR: i=%d, j=%d\n", i, j );
+ exit( 1 );
+ }
+#endif
+ }
+#endif
+#if 0
+ printf( "after averaging:\n" );
+
+ for( ti=0; ti<ntarget; ti++ ) for( j=0; j<nseq; j++ )
+ {
+ i = targetmapr[ti];
+ if( i == j ) continue;
+ printf( "%d-%d\n", i, j );
+ for( tmpptr = localhom[ti]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->end1 )
+ printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
+ }
+ }
+//exit( 1 );
+#endif
+ free( importance );
+ free( nogaplen );
+ free( ieff );
+}
+
+void calcimportance_half( int nseq, double *eff, char **seq, LocalHom **localhom )
+{
+ int i, j, pos, len;
+ double *importance; // static -> local, 2012/02/25
+ double tmpdouble;
+ double *ieff, totaleff; // counteff_simple_double ni utsusu kamo
+ int *nogaplen; // static -> local, 2012/02/25
+ LocalHom *tmpptr;
+
+ importance = AllocateDoubleVec( nlenmax );
+ nogaplen = AllocateIntVec( nseq );
+ ieff = AllocateDoubleVec( nseq );
+
+ totaleff = 0.0;
+ for( i=0; i<nseq; i++ )
+ {
+ nogaplen[i] = seqlen( seq[i] );
+// reporterr( "nogaplen[] = %d\n", nogaplen[i] );
+ if( nogaplen[i] == 0 ) ieff[i] = 0.0;
+ else ieff[i] = eff[i];
+ totaleff += ieff[i];
+ }
+ for( i=0; i<nseq; i++ ) ieff[i] /= totaleff;
+// for( i=0; i<nseq; i++ ) reporterr( "eff[%d] = %f\n", i, ieff[i] );
+
+#if 0
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
+ {
+ tmpptr = localhom[i]+j;
+ reporterr( "%d-%d\n", i, j );
+ do
+ {
+ reporterr( "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
+ } while( tmpptr=tmpptr->next );
+ }
+#endif
+
+
+ for( i=0; i<nseq; i++ )
+ {
+// reporterr( "i = %d\n", i );
+ for( pos=0; pos<nlenmax; pos++ )
+ importance[pos] = 0.0;
+ for( j=0; j<nseq; j++ )
+ {
+ if( i == j ) continue;
+
+ else if( i < j )
+ {
+ for( tmpptr = localhom[i]+j-i; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1 ) continue;
+ for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
+ {
+#if 1
+// if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos );
+ importance[pos] += ieff[j];
+#else
+ importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
+ importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa;
+#endif
+ }
+ }
+ }
+ else
+ {
+ for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1 ) continue;
+ for( pos=tmpptr->start2; pos<=tmpptr->end2; pos++ )
+ {
+#if 1
+// if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos );
+ importance[pos] += ieff[j];
+#else
+ importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
+ importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa;
+#endif
+ }
+ }
+ }
+ }
+#if 0
+ reporterr( "position specific importance of seq %d:\n", i );
+ for( pos=0; pos<nlenmax; pos++ )
+ reporterr( "%d: %f\n", pos, importance[pos] );
+ reporterr( "\n" );
+#endif
+ for( j=0; j<nseq; j++ )
+ {
+// reporterr( "i=%d, j=%d\n", i, j );
+ if( i == j ) continue;
+
+ else if( i < j )
+ {
+ if( localhom[i][j-i].opt == -1.0 ) continue;
+
+ for( tmpptr = localhom[i]+j-i; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1.0 ) continue;
+ tmpdouble = 0.0;
+ len = 0;
+ for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
+ {
+ tmpdouble += importance[pos];
+ len++;
+ }
+
+ tmpdouble /= (double)len;
+
+ tmpptr->importance = tmpdouble * tmpptr->opt;
+// tmpptr->fimportance = (double)tmpptr->importance;
+ }
+ }
+ else
+ {
+ if( localhom[j][i-j].opt == -1.0 ) continue;
+
+ for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1.0 ) continue;
+ tmpdouble = 0.0;
+ len = 0;
+ for( pos=tmpptr->start2; pos<=tmpptr->end2; pos++ )
+ {
+ tmpdouble += importance[pos];
+ len++;
+ }
+
+ tmpdouble /= (double)len;
+
+ tmpptr->rimportance = tmpdouble * tmpptr->opt;
+// tmpptr->fimportance = (double)tmpptr->importance;
+ }
+ }
+
+// reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble );
+ }
+ }
+
+#if 0
+ printf( "before averaging:\n" );
+
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
+ {
+ if( i == j ) continue;
+
+ else if( i < j )
+ {
+ printf( "%d-%d\n", i, j );
+ for( tmpptr = localhom[i]+j-i; tmpptr; tmpptr=tmpptr->next )
+ {
+ printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
+ }
+ }
+ else
+ {
+ printf( "%d-%d\n", i, j );
+ for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next )
+ {
+ printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start2, tmpptr->end2, tmpptr->start1, tmpptr->end1, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->rimportance, tmpptr->opt );
+ }
+ }
+ }
+#endif
+
+#if 1
+// reporterr( "average?\n" );
+ for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+ {
+ double imp;
+ LocalHom *tmpptr1;
+
+// reporterr( "i=%d, j=%d\n", i, j );
+
+ tmpptr1 = localhom[i]+j-i;
+ for( ; tmpptr1; tmpptr1 = tmpptr1->next)
+ {
+ if( tmpptr1->opt == -1.0 )
+ {
+// reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
+ continue;
+ }
+// reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
+ imp = 0.5 * ( tmpptr1->importance + tmpptr1->rimportance );
+ tmpptr1->importance = tmpptr1->rimportance = imp;
+// tmpptr1->fimportance = tmpptr2->fimportance = (double)imp;
+
+// reporterr( "## importance = %f\n", tmpptr1->importance );
+
+ }
+
+#if 0 // commented out, 2012/02/10
+ if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
+ {
+ reporterr( "ERROR: i=%d, j=%d\n", i, j );
+ exit( 1 );
+ }
+#endif
+ }
+#endif
+#if 0
+ printf( "after averaging:\n" );
+
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
+ {
+ if( i < j ) for( tmpptr = localhom[i]+j-i; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->end1 && tmpptr->start1 != -1 )
+ printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
+ }
+ else for( tmpptr = localhom[j]+i-j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->end2 && tmpptr->start2 != -1 )
+ printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start2, tmpptr->end2, tmpptr->start1, tmpptr->end1, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
+ }
+ }
+exit( 1 );
+#endif
+ free( importance );
+ free( nogaplen );
+ free( ieff );
+}
+
+void calcimportance( int nseq, double *eff, char **seq, LocalHom **localhom )
+{
+ int i, j, pos, len;
+ double *importance; // static -> local, 2012/02/25
+ double tmpdouble;
+ double *ieff, totaleff; // counteff_simple_double ni utsusu kamo
+ int *nogaplen; // static -> local, 2012/02/25
+ LocalHom *tmpptr;
+
+ importance = AllocateDoubleVec( nlenmax );
+ nogaplen = AllocateIntVec( nseq );
+ ieff = AllocateDoubleVec( nseq );
+
+ totaleff = 0.0;
+ for( i=0; i<nseq; i++ )
+ {
+ nogaplen[i] = seqlen( seq[i] );
+// reporterr( "nogaplen[] = %d\n", nogaplen[i] );
+ if( nogaplen[i] == 0 ) ieff[i] = 0.0;
+ else ieff[i] = eff[i];
+ totaleff += ieff[i];
+ }
+ for( i=0; i<nseq; i++ ) ieff[i] /= totaleff;
+// for( i=0; i<nseq; i++ ) reporterr( "eff[%d] = %f\n", i, ieff[i] );
+
+#if 0
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
+ {
+ tmpptr = localhom[i]+j;
+ reporterr( "%d-%d\n", i, j );
+ do
+ {
+ reporterr( "reg1=%d-%d, reg2=%d-%d, opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt );
+ } while( tmpptr=tmpptr->next );
+ }
+#endif
+
+
+ for( i=0; i<nseq; i++ )
+ {
+// reporterr( "i = %d\n", i );
+ for( pos=0; pos<nlenmax; pos++ )
+ importance[pos] = 0.0;
+ for( j=0; j<nseq; j++ )
+ {
+ if( i == j ) continue;
+ tmpptr = localhom[i]+j;
+ for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1 ) continue;
+ for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
+ {
+#if 1
+// if( pos == 0 ) reporterr( "hit! i=%d, j=%d, pos=%d\n", i, j, pos );
+ importance[pos] += ieff[j];
+#else
+ importance[pos] += ieff[j] * tmpptr->opt / MIN( nogaplen[i], nogaplen[j] );
+ importance[pos] += ieff[j] * tmpptr->opt / tmpptr->overlapaa;
+#endif
+ }
+ }
+ }
+#if 0
+ reporterr( "position specific importance of seq %d:\n", i );
+ for( pos=0; pos<nlenmax; pos++ )
+ reporterr( "%d: %f\n", pos, importance[pos] );
+ reporterr( "\n" );
+#endif
+ for( j=0; j<nseq; j++ )
+ {
+// reporterr( "i=%d, j=%d\n", i, j );
+ if( i == j ) continue;
+ if( localhom[i][j].opt == -1.0 ) continue;
+#if 1
+ for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1.0 ) continue;
+ tmpdouble = 0.0;
+ len = 0;
+ for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
+ {
+ tmpdouble += importance[pos];
+ len++;
+ }
+
+ tmpdouble /= (double)len;
+
+ tmpptr->importance = tmpdouble * tmpptr->opt;
+// tmpptr->fimportance = (double)tmpptr->importance;
+ }
+#else
+ tmpdouble = 0.0;
+ len = 0;
+ for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1.0 ) continue;
+ for( pos=tmpptr->start1; pos<=tmpptr->end1; pos++ )
+ {
+ tmpdouble += importance[pos];
+ len++;
+ }
+ }
+
+ tmpdouble /= (double)len;
+
+ for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1.0 ) continue;
+ tmpptr->importance = tmpdouble * tmpptr->opt;
+// tmpptr->importance = tmpptr->opt / tmpptr->overlapaa; //\e$B$J$+$C$?$3$H$K$9$k\e(B
+ }
+#endif
+
+// reporterr( "importance of match between %d - %d = %f\n", i, j, tmpdouble );
+ }
+ }
+
+#if 0
+ printf( "before averaging:\n" );
+
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
+ {
+ if( i == j ) continue;
+ printf( "%d-%d\n", i, j );
+ for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ printf( "reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, eff[i] * tmpptr->importance, tmpptr->opt );
+ }
+ }
+#endif
+
+#if 1
+// reporterr( "average?\n" );
+ for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+ {
+ double imp;
+ LocalHom *tmpptr1, *tmpptr2;
+
+// reporterr( "i=%d, j=%d\n", i, j );
+
+ tmpptr1 = localhom[i]+j; tmpptr2 = localhom[j]+i;
+ for( ; tmpptr1 && tmpptr2; tmpptr1 = tmpptr1->next, tmpptr2 = tmpptr2->next)
+ {
+ if( tmpptr1->opt == -1.0 || tmpptr2->opt == -1.0 )
+ {
+// reporterr( "WARNING: i=%d, j=%d, tmpptr1->opt=%f, tmpptr2->opt=%f\n", i, j, tmpptr1->opt, tmpptr2->opt );
+ continue;
+ }
+// reporterr( "## importances = %f, %f\n", tmpptr1->importance, tmpptr2->importance );
+ imp = 0.5 * ( tmpptr1->importance + tmpptr2->importance );
+ tmpptr1->importance = tmpptr2->importance = imp;
+// tmpptr1->fimportance = tmpptr2->fimportance = (double)imp;
+
+// reporterr( "## importance = %f\n", tmpptr1->importance );
+
+ }
+
+#if 0 // commented out, 2012/02/10
+ if( ( tmpptr1 && !tmpptr2 ) || ( !tmpptr1 && tmpptr2 ) )
+ {
+ reporterr( "ERROR: i=%d, j=%d\n", i, j );
+ exit( 1 );
+ }
+#endif
+ }
+#endif
+#if 0
+ printf( "after averaging:\n" );
+
+ for( i=0; i<nseq; i++ ) for( j=0; j<nseq; j++ )
+ {
+ for( tmpptr = localhom[i]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->end1 && tmpptr->start1 != -1 )
+ printf( "%d-%d, reg1=%d-%d, reg2=%d-%d, imp=%f -> %f opt=%f\n", i, j, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->opt / tmpptr->overlapaa, tmpptr->importance, tmpptr->opt );
+ }
+ }
+exit( 1 );
+#endif
+ free( importance );
+ free( nogaplen );
+ free( ieff );
+}
+
+
+
+
+static void addlocalhom2_e( LocalHom *pt, LocalHom *lh, int sti, int stj, int eni, int enj, double opt, int overlp, int interm )
+{
+// dokka machigatteru
+ if( pt != lh ) // susumeru
+ {
+ pt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
+ pt = pt->next;
+ pt->next = NULL;
+ lh->last = pt;
+ }
+ else // sonomamatsukau
+ {
+ lh->last = pt;
+ }
+ lh->nokori++;
+// reporterr( "in addlocalhom2_e, pt = %p, pt->next = %p, interm=%d, sti-eni-stj-enj=%d %d %d %d\n", pt, pt->next, interm, sti, eni, stj, enj );
+
+ pt->start1 = sti;
+ pt->start2 = stj;
+ pt->end1 = eni;
+ pt->end2 = enj;
+ pt->opt = opt;
+ pt->extended = interm;
+ pt->overlapaa = overlp;
+#if 0
+ reporterr( "i: %d-%d\n", sti, eni );
+ reporterr( "j: %d-%d\n", stj, enj );
+ reporterr( "opt=%f\n", opt );
+ reporterr( "overlp=%d\n", overlp );
+#endif
+}
+
+void extendlocalhom2( int nseq, LocalHom **localhom, double **dist )
+{
+ int overlp, plim;
+ int i, j, k;
+ int pi, pj, pk, len;
+ int status, sti, stj;
+ int *ipt;
+ int co;
+ static int *ini = NULL;
+ static int *inj = NULL;
+ LocalHom *pt;
+
+ sti = 0; // by D.Mathog, a guess
+ stj = 0; // by D.Mathog, a guess
+
+ if( ini == NULL )
+ {
+ ini = AllocateIntVec( nlenmax+1 );
+ inj = AllocateIntVec( nlenmax+1 );
+ }
+
+
+ for( i=0; i<nseq-1; i++ )
+ {
+ for( j=i+1; j<nseq; j++ )
+ {
+#if 0
+ for( k=0; k<nseq; k++ ) sai[k] = 0;
+ numint = ncons;
+ while( 1 )
+ {
+ k = (int)( rnd() * nseq );
+ if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
+ if( numint-- == 0 ) break;
+ if( sai[k] ) continue;
+ sai[k] = 1;
+#else
+ for( k=0; k<nseq; k++ )
+ {
+#endif
+// reporterr( "i=%d, j=%d, k=%d, dists = %f,%f,%f thrinter=%f\n", i, j, k, dist[i][j], dist[MIN(i,k)][MAX(i,k)], dist[MIN(j,k)][MAX(j,k)], thrinter );
+ if( k == i || k == j ) continue; // mou yatta nomo habuita hoga ii
+ if( dist[MIN(i,k)][MAX(i,k)] > dist[i][j] * thrinter || dist[MIN(j,k)][MAX(j,k)] > dist[i][j] * thrinter ) continue;
+ ipt = ini; co = nlenmax+1;
+ while( co-- ) *ipt++ = -1;
+ ipt = inj; co = nlenmax+1;
+ while( co-- ) *ipt++ = -1;
+ overlp = 0;
+
+ {
+ for( pt=localhom[i]+k; pt; pt=pt->next )
+ {
+// reporterr( "i=%d,k=%d,st1:st2=%d:%d,pt=%p,extended=%p\n", i, k, pt->start1, pt->start2, pt, pt->extended );
+ if( pt->opt == -1 )
+ {
+ reporterr( "opt kainaide tbfast.c = %f\n", pt->opt );
+ }
+ if( pt->extended > -1 ) break;
+ pi = pt->start1;
+ pk = pt->start2;
+ len = pt->end1 - pt->start1 + 1;
+ ipt = ini + pk;
+ while( len-- ) *ipt++ = pi++;
+ }
+ }
+
+ {
+ for( pt=localhom[j]+k; pt; pt=pt->next )
+ {
+ if( pt->opt == -1 )
+ {
+ reporterr( "opt kainaide tbfast.c = %f\n", pt->opt );
+ }
+ if( pt->extended > -1 ) break;
+ pj = pt->start1;
+ pk = pt->start2;
+ len = pt->end1 - pt->start1 + 1;
+ ipt = inj + pk;
+ while( len-- ) *ipt++ = pj++;
+ }
+ }
+#if 0
+ reporterr( "i=%d,j=%d,k=%d\n", i, j, k );
+ overlp = 0;
+ for( pk = 0; pk < nlenmax; pk++ )
+ {
+ if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
+ reporterr( " %d", inj[pk] );
+ }
+ reporterr( "\n" );
+
+ reporterr( "i=%d,j=%d,k=%d\n", i, j, k );
+ overlp = 0;
+ for( pk = 0; pk < nlenmax; pk++ )
+ {
+ if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
+ reporterr( " %d", ini[pk] );
+ }
+ reporterr( "\n" );
+#endif
+ overlp = 0;
+ plim = nlenmax+1;
+ for( pk = 0; pk < plim; pk++ )
+ if( ini[pk] != -1 && inj[pk] != -1 ) overlp++;
+
+
+ status = 0;
+ plim = nlenmax+1;
+ for( pk=0; pk<plim; pk++ )
+ {
+// reporterr( "%d %d: %d-%d\n", i, j, ini[pk], inj[pk] );
+ if( status )
+ {
+ if( ini[pk] == -1 || inj[pk] == -1 || ini[pk-1] != ini[pk] - 1 || inj[pk-1] != inj[pk] - 1 ) // saigonoshori
+ {
+ status = 0;
+// reporterr( "end here!\n" );
+
+ pt = localhom[i][j].last;
+// reporterr( "in ex (ba), pt = %p, nokori=%d, i,j,k=%d,%d,%d\n", pt, localhom[i][j].nokori, i, j, k );
+ addlocalhom2_e( pt, localhom[i]+j, sti, stj, ini[pk-1], inj[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
+// reporterr( "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
+
+ pt = localhom[j][i].last;
+// reporterr( "in ex (ba), pt = %p, pt->next = %p\n", pt, pt->next );
+// reporterr( "in ex (ba), pt = %p, pt->next = %p, k=%d\n", pt, pt->next, k );
+ addlocalhom2_e( pt, localhom[j]+i, stj, sti, inj[pk-1], ini[pk-1], MIN( localhom[i][k].opt, localhom[j][k].opt ) * 1.0, overlp, k );
+// reporterr( "in ex, pt = %p, pt->next = %p, pt->next->next = %p\n", pt, pt->next, pt->next->next );
+ }
+ }
+ if( !status ) // else deha arimasenn.
+ {
+ if( ini[pk] == -1 || inj[pk] == -1 ) continue;
+ sti = ini[pk];
+ stj = inj[pk];
+// reporterr( "start here!\n" );
+ status = 1;
+ }
+ }
+// if( status ) reporterr( "end here\n" );
+
+// exit( 1 );
+// fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
+ }
+#if 0
+ for( pt=localhomtable[i]+j; pt; pt=pt->next )
+ {
+ if( tmpptr->opt == -1.0 ) continue;
+ fprintf( hat3p, "%d %d %d %6.3f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->next );
+ }
+#endif
+ }
+ }
+}
+
+int makelocal( char *s1, char *s2, int thr )
+{
+ int start, maxstart, maxend;
+ char *pt1, *pt2;
+ double score;
+ double maxscore;
+
+ pt1 = s1;
+ pt2 = s2;
+
+ maxend = 0; // by D.Mathog, a guess
+
+// reporterr( "thr = %d, \ns1 = %s\ns2 = %s\n", thr, s1, s2 );
+ maxscore = 0.0;
+ score = 0.0;
+ start = 0;
+ maxstart = 0;
+ while( *pt1 )
+ {
+// reporterr( "*pt1 = %c*pt2 = %c\n", *pt1, *pt2 );
+ if( *pt1 == '-' || *pt2 == '-' )
+ {
+// reporterr( "penalty = %d\n", penalty );
+ score += penalty;
while( *pt1 == '-' || *pt2 == '-' )
{
- pt1++; pt2++;
+ pt1++; pt2++;
+ }
+ continue;
+ }
+
+ score += ( amino_dis[(unsigned char)*pt1++][(unsigned char)*pt2++] - thr );
+// score += ( amino_dis[(int)*pt1++][(int)*pt2++] );
+ if( score > maxscore )
+ {
+// reporterr( "score = %f\n", score );
+ maxscore = score;
+ maxstart = start;
+// reporterr( "## max! maxstart = %d, start = %d\n", maxstart, start );
+ }
+ if( score < 0.0 )
+ {
+// reporterr( "## resetting, start = %d, maxstart = %d\n", start, maxstart );
+ if( start == maxstart )
+ {
+ maxend = pt1 - s1;
+// reporterr( "maxend = %d\n", maxend );
+ }
+ score = 0.0;
+ start = pt1 - s1;
+ }
+ }
+ if( start == maxstart )
+ maxend = pt1 - s1 - 1;
+
+// reporterr( "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore );
+ s1[maxend+1] = 0;
+ s2[maxend+1] = 0;
+ return( maxstart );
+}
+
+void resetlocalhom( int nseq, LocalHom **lh )
+{
+ int i, j;
+ LocalHom *pt;
+
+ for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+ {
+ for( pt=lh[i]+j; pt; pt=pt->next )
+ pt->opt = 1.0;
+ }
+
+}
+
+void gapireru( char *res, char *ori, char *gt )
+{
+ char g;
+ while( (g = *gt++) )
+ {
+ if( g == '-' )
+ {
+ *res++ = *newgapstr;
+ }
+ else
+ {
+ *res++ = *ori++;
+ }
+ }
+ *res = 0;
+}
+
+void getkyokaigap( char *g, char **s, int pos, int n )
+{
+// char *bk = g;
+// while( n-- ) *g++ = '-';
+ while( n-- ) *g++ = (*s++)[pos];
+
+// reporterr( "bk = %s\n", bk );
+}
+
+void new_OpeningGapCount( double *ogcp, int clus, char **seq, double *eff, int len, char *sgappat )
+#if 0
+{
+ int i, j, gc, gb;
+ double feff;
+
+
+ for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
+ for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ gc = ( sgappat[j] == '-' );
+ for( i=0; i<len; i++ )
+ {
+ gb = gc;
+ gc = ( seq[j][i] == '-' );
+ if( !gb * gc ) ogcp[i] += feff;
+ }
+ }
+}
+#else
+{
+ int i, j, gc, gb;
+ double feff;
+ double *fpt;
+ char *spt;
+
+ fpt = ogcp;
+ i = len;
+ while( i-- ) *fpt++ = 0.0;
+ for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ spt = seq[j];
+ fpt = ogcp;
+ gc = ( sgappat[j] == '-' );
+ i = len;
+ while( i-- )
+ {
+ gb = gc;
+ gc = ( *spt++ == '-' );
+ {
+ if( !gb * gc ) *fpt += feff;
+ fpt++;
+ }
+ }
+ }
+}
+#endif
+void new_OpeningGapCount_zure( double *ogcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
+#if 0
+{
+ int i, j, gc, gb;
+ double feff;
+
+
+ for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
+ for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ gc = ( sgappat[j] == '-' );
+ for( i=0; i<len; i++ )
+ {
+ gb = gc;
+ gc = ( seq[j][i] == '-' );
+ if( !gb * gc ) ogcp[i] += feff;
+ }
+ {
+ gb = gc;
+ gc = ( egappat[j] == '-' );
+ if( !gb * gc ) ogcp[i] += feff;
+ }
+ }
+}
+#else
+{
+ int i, j, gc, gb;
+ double feff;
+ double *fpt;
+ char *spt;
+
+ fpt = ogcp;
+ i = len+2;
+ while( i-- ) *fpt++ = 0.0;
+ for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ spt = seq[j];
+ fpt = ogcp;
+ gc = ( sgappat[j] == '-' );
+ i = len;
+ while( i-- )
+ {
+ gb = gc;
+ gc = ( *spt++ == '-' );
+ {
+ if( !gb * gc ) *fpt += feff;
+ fpt++;
+ }
+ }
+ {
+ gb = gc;
+ gc = ( egappat[j] == '-' );
+ if( !gb * gc ) *fpt += feff;
+ }
+ }
+}
+#endif
+
+void new_FinalGapCount_zure( double *fgcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
+#if 0
+{
+ int i, j, gc, gb;
+ double feff;
+
+ for( i=0; i<len+1; i++ ) fgcp[i] = 0.0;
+ for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ gc = ( sgappat[j] == '-' );
+ for( i=0; i<len; i++ )
+ {
+ gb = gc;
+ gc = ( seq[j][i] == '-' );
+ {
+ if( gb * !gc ) fgcp[i] += feff;
+ }
+ }
+ {
+ gb = gc;
+ gc = ( egappat[j] == '-' );
+ {
+ if( gb * !gc ) fgcp[len] += feff;
+ }
+ }
+ }
+}
+#else
+{
+ int i, j, gc, gb;
+ double feff;
+ double *fpt;
+ char *spt;
+
+ fpt = fgcp;
+ i = len+2;
+ while( i-- ) *fpt++ = 0.0;
+ for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ fpt = fgcp;
+ spt = seq[j];
+ gc = ( sgappat[j] == '-' );
+ i = len;
+ while( i-- )
+ {
+ gb = gc;
+ gc = ( *spt++ == '-' );
+ {
+ if( gb * !gc ) *fpt += feff;
+ fpt++;
+ }
+ }
+ {
+ gb = gc;
+ gc = ( egappat[j] == '-' );
+ {
+ if( gb * !gc ) *fpt += feff;
+ }
+ }
+ }
+}
+#endif
+void new_FinalGapCount( double *fgcp, int clus, char **seq, double *eff, int len, char *egappat )
+#if 0
+{
+ int i, j, gc, gb;
+ double feff;
+
+ for( i=0; i<len; i++ ) fgcp[i] = 0.0;
+ for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ gc = ( seq[j][0] == '-' );
+ for( i=1; i<len; i++ )
+ {
+ gb = gc;
+ gc = ( seq[j][i] == '-' );
+ {
+ if( gb * !gc ) fgcp[i-1] += feff;
+ }
+ }
+ {
+ gb = gc;
+ gc = ( egappat[j] == '-' );
+ {
+ if( gb * !gc ) fgcp[len-1] += feff;
+ }
+ }
+ }
+}
+#else
+{
+ int i, j, gc, gb;
+ double feff;
+ double *fpt;
+ char *spt;
+
+ fpt = fgcp;
+ i = len;
+ while( i-- ) *fpt++ = 0.0;
+ for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ fpt = fgcp;
+ spt = seq[j];
+ gc = ( *spt == '-' );
+ i = len;
+ while( i-- )
+ {
+ gb = gc;
+ gc = ( *++spt == '-' );
+ {
+ if( gb * !gc ) *fpt += feff;
+ fpt++;
+ }
+ }
+ {
+ gb = gc;
+ gc = ( egappat[j] == '-' );
+ {
+ if( gb * !gc ) *fpt += feff;
+ }
+ }
+ }
+}
+#endif
+
+void st_OpeningGapAdd( double *ogcp, int clus, char **seq, double *eff, int len )
+{
+ int i, j, gc, gb;
+ double *fpt;
+ char *spt;
+ int newmem = clus-1;
+ double neweff = eff[newmem];
+ double orieff = 1.0 - neweff;
+ double feff;
+
+// fpt = ogcp;
+// i = len;
+// while( i-- ) *fpt++ = 0.0;
+
+ j = clus-1;
+// for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ spt = seq[j];
+ fpt = ogcp;
+ i = len;
+ gc = 0;
+ while( i-- )
+ {
+ gb = gc;
+ gc = ( *spt++ == '-' );
+ *fpt *= orieff;
+ if( !gb * gc ) *fpt += feff;
+ fpt++;
+ }
+ }
+ ogcp[len] = 0.0;
+
+#if 0
+ for( i=0; i<len; i++ )
+ reporterr( "ogcp[%d]=%f\n", i, ogcp[i] );
+ for( i=0; i<clus; i++ )
+ reporterr( "%s\n", seq[i] );
+ exit( 1 );
+#endif
+}
+
+void st_OpeningGapCount( double *ogcp, int clus, char **seq, double *eff, int len )
+{
+ int i, j, gc, gb;
+ double feff;
+ double *fpt;
+ char *spt;
+
+ fpt = ogcp;
+ i = len;
+ while( i-- ) *fpt++ = 0.0;
+ for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ spt = seq[j];
+ fpt = ogcp;
+ gc = 0;
+// gc = 1;
+ i = len;
+ while( i-- )
+ {
+ gb = gc;
+ gc = ( *spt++ == '-' );
+ {
+ if( !gb * gc ) *fpt += feff;
+ fpt++;
+ }
+ }
+ }
+ ogcp[len] = 0.0;
+}
+
+void st_FinalGapCount_zure( double *fgcp, int clus, char **seq, double *eff, int len )
+{
+ int i, j, gc, gb;
+ double feff;
+ double *fpt;
+ char *spt;
+
+ fpt = fgcp;
+ i = len+1;
+ while( i-- ) *fpt++ = 0.0;
+ for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ fpt = fgcp+1;
+ spt = seq[j];
+ gc = ( *spt == '-' );
+ i = len;
+// for( i=1; i<len; i++ )
+ while( i-- )
+ {
+ gb = gc;
+ gc = ( *++spt == '-' );
+ {
+ if( gb * !gc ) *fpt += feff;
+ fpt++;
+ }
+ }
+ {
+ gb = gc;
+ gc = 0;
+// gc = 1;
+ {
+ if( gb * !gc ) *fpt += feff;
+ }
+ }
+ }
+}
+
+void st_FinalGapAdd( double *fgcp, int clus, char **seq, double *eff, int len )
+{
+ int i, j, gc, gb;
+ double *fpt;
+ char *spt;
+ int newmem = clus-1;
+ double neweff = eff[newmem];
+ double orieff = 1.0 - neweff;
+ double feff;
+
+// fpt = fgcp;
+// i = len;
+// while( i-- ) *fpt++ = 0.0;
+
+ j = clus-1;
+// for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ fpt = fgcp;
+ spt = seq[j];
+ gc = ( *spt == '-' );
+ i = len;
+// for( i=1; i<len; i++ )
+ while( i-- )
+ {
+ *fpt *= orieff;
+ gb = gc;
+ gc = ( *++spt == '-' );
+ {
+ if( gb * !gc ) *fpt += feff;
+ fpt++;
+ }
+ }
+ {
+ *fpt *= orieff;
+ gb = gc;
+ gc = 0;
+// gc = 1;
+ {
+ if( gb * !gc ) *fpt += feff;
+ }
+ }
+ }
+}
+
+void st_FinalGapCount( double *fgcp, int clus, char **seq, double *eff, int len )
+{
+ int i, j, gc, gb;
+ double feff;
+ double *fpt;
+ char *spt;
+
+ fpt = fgcp;
+ i = len;
+ while( i-- ) *fpt++ = 0.0;
+ for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ fpt = fgcp;
+ spt = seq[j];
+ gc = ( *spt == '-' );
+ i = len;
+// for( i=1; i<len; i++ )
+ while( i-- )
+ {
+ gb = gc;
+ gc = ( *++spt == '-' );
+ {
+ if( gb * !gc ) *fpt += feff;
+ fpt++;
+ }
+ }
+ {
+ gb = gc;
+ gc = 0;
+// gc = 1;
+ {
+ if( gb * !gc ) *fpt += feff;
+ }
+ }
+ }
+}
+
+void getGapPattern( double *fgcp, int clus, char **seq, double *eff, int len, char *xxx )
+{
+ int i, j, gc, gb;
+ double feff;
+ double *fpt;
+ char *spt;
+
+ fpt = fgcp;
+ i = len+1;
+ while( i-- ) *fpt++ = 0.0;
+ for( j=0; j<clus; j++ )
+ {
+ feff = (double)eff[j];
+ fpt = fgcp;
+ spt = seq[j];
+ gc = ( *spt == '-' );
+ i = len+1;
+ while( i-- )
+ {
+ gb = gc;
+ gc = ( *++spt == '-' );
+ {
+ if( gb * !gc ) *fpt += feff;
+ fpt++;
+ }
+ }
+#if 0
+ {
+ gb = gc;
+ gc = ( egappat[j] == '-' );
+ {
+ if( gb * !gc ) *fpt += feff;
+ }
+ }
+#endif
+ }
+ for( j=0; j<len; j++ )
+ {
+ reporterr( "%d, %f\n", j, fgcp[j] );
+ }
+}
+
+void getdigapfreq_st( double *freq, int clus, char **seq, double *eff, int len )
+{
+ int i, j;
+ double feff;
+ for( i=0; i<len+1; i++ ) freq[i] = 0.0;
+ for( i=0; i<clus; i++ )
+ {
+ feff = eff[i];
+ if( 0 && seq[i][0] == '-' ) // machigai kamo
+ freq[0] += feff;
+ for( j=1; j<len; j++ )
+ {
+ if( seq[i][j] == '-' && seq[i][j-1] == '-' )
+ freq[j] += feff;
+ }
+ if( 0 && seq[i][len-1] == '-' )
+ freq[len] += feff;
+ }
+// reporterr( "\ndigapf = \n" );
+// for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
+}
+
+void getdiaminofreq_x( double *freq, int clus, char **seq, double *eff, int len )
+{
+ int i, j;
+ double feff;
+ for( i=0; i<len+2; i++ ) freq[i] = 0.0;
+ for( i=0; i<clus; i++ )
+ {
+ feff = eff[i];
+ if( seq[i][0] != '-' ) // tadashii
+ freq[0] += feff;
+ for( j=1; j<len; j++ )
+ {
+ if( seq[i][j] != '-' && seq[i][j-1] != '-' )
+ freq[j] += feff;
+ }
+ if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
+ freq[len] += feff;
+ }
+// reporterr( "\ndiaaf = \n" );
+// for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
+}
+
+void getdiaminofreq_st( double *freq, int clus, char **seq, double *eff, int len )
+{
+ int i, j;
+ double feff;
+ for( i=0; i<len+1; i++ ) freq[i] = 0.0;
+ for( i=0; i<clus; i++ )
+ {
+ feff = eff[i];
+ if( seq[i][0] != '-' )
+ freq[0] += feff;
+ for( j=1; j<len; j++ )
+ {
+ if( seq[i][j] != '-' && seq[i][j-1] != '-' )
+ freq[j] += feff;
+ }
+// if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
+ freq[len] += feff;
+ }
+// reporterr( "\ndiaaf = \n" );
+// for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
+}
+
+void getdigapfreq_part( double *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
+{
+ int i, j;
+ double feff;
+ for( i=0; i<len+2; i++ ) freq[i] = 0.0;
+ for( i=0; i<clus; i++ )
+ {
+ feff = eff[i];
+// if( seq[i][0] == '-' )
+ if( seq[i][0] == '-' && sgappat[i] == '-' )
+ freq[0] += feff;
+ for( j=1; j<len; j++ )
+ {
+ if( seq[i][j] == '-' && seq[i][j-1] == '-' )
+ freq[j] += feff;
+ }
+// if( seq[i][len] == '-' && seq[i][len-1] == '-' ) // xxx wo tsukawanaitoki arienai
+ if( egappat[i] == '-' && seq[i][len-1] == '-' )
+ freq[len] += feff;
+ }
+// reporterr( "\ndigapf = \n" );
+// for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
+}
+
+void getdiaminofreq_part( double *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
+{
+ int i, j;
+ double feff;
+ for( i=0; i<len+2; i++ ) freq[i] = 0.0;
+ for( i=0; i<clus; i++ )
+ {
+ feff = eff[i];
+ if( seq[i][0] != '-' && sgappat[i] != '-' )
+ freq[0] += feff;
+ for( j=1; j<len; j++ )
+ {
+ if( seq[i][j] != '-' && seq[i][j-1] != '-' )
+ freq[j] += feff;
+ }
+// if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
+ if( egappat[i] != '-' && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
+ freq[len] += feff;
+ }
+// reporterr( "\ndiaaf = \n" );
+// for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
+}
+
+void getgapfreq_zure_part( double *freq, int clus, char **seq, double *eff, int len, char *sgap )
+{
+ int i, j;
+ double feff;
+ for( i=0; i<len+2; i++ ) freq[i] = 0.0;
+ for( i=0; i<clus; i++ )
+ {
+ feff = eff[i];
+ if( sgap[i] == '-' )
+ freq[0] += feff;
+ for( j=0; j<len; j++ )
+ {
+ if( seq[i][j] == '-' )
+ freq[j+1] += feff;
+ }
+// if( egap[i] == '-' )
+// freq[len+1] += feff;
+ }
+// reporterr( "\ngapf = \n" );
+// for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
+}
+
+void getgapfreq_zure( double *freq, int clus, char **seq, double *eff, int len )
+{
+ int i, j;
+ double feff;
+ for( i=0; i<len+1; i++ ) freq[i] = 0.0;
+ for( i=0; i<clus; i++ )
+ {
+ feff = eff[i];
+ for( j=0; j<len; j++ )
+ {
+ if( seq[i][j] == '-' )
+ freq[j+1] += feff;
+ }
+ }
+ freq[len+1] = 0.0;
+// reporterr( "\ngapf = \n" );
+// for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
+}
+
+void getgapfreq( double *freq, int clus, char **seq, double *eff, int len )
+{
+ int i, j;
+ double feff;
+ for( i=0; i<len+1; i++ ) freq[i] = 0.0;
+ for( i=0; i<clus; i++ )
+ {
+ feff = eff[i];
+ for( j=0; j<len; j++ )
+ {
+ if( seq[i][j] == '-' )
+ freq[j] += feff;
+ }
+ }
+ freq[len] = 0.0;
+// reporterr( "\ngapf = \n" );
+// for( i=0; i<len+1; i++ ) reporterr( "%5.3f ", freq[i] );
+}
+
+void st_getGapPattern( Gappat **pat, int clus, char **seq, double *eff, int len )
+{
+ int i, j, k, gb, gc;
+ int known;
+ double feff;
+ Gappat **fpt;
+ char *spt;
+ int gaplen;
+
+ fpt = pat;
+ i = len+1;
+ while( i-- )
+ {
+ if( *fpt ) free( *fpt );
+ *fpt++ = NULL;
+ }
+
+ for( j=0; j<clus; j++ )
+ {
+// reporterr( "seq[%d] = %s\n", j, seq[j] );
+ feff = (double)eff[j];
+
+ fpt = pat;
+ *fpt = NULL; // Falign.c kara yobareru tokiha chigau.
+ spt = seq[j];
+ gc = 0;
+ gaplen = 0;
+
+ for( i=0; i<len+1; i++ )
+// while( i-- )
+ {
+// reporterr( "i=%d, gaplen = %d\n", i, gaplen );
+ gb = gc;
+ gc = ( i != len && *spt++ == '-' );
+ if( gc )
+ gaplen++;
+ else
+ {
+ if( gb && gaplen )
+ {
+ k = 1;
+ known = 0;
+ if( *fpt ) for( ; (*fpt)[k].len != -1; k++ )
+ {
+ if( (*fpt)[k].len == gaplen )
+ {
+// reporterr( "known\n" );
+ known = 1;
+ break;
+ }
+ }
+
+ if( known == 0 )
+ {
+ *fpt = (Gappat *)realloc( *fpt, (k+3) * sizeof( Gappat ) ); // mae1 (total), ato2 (len0), term
+ if( !*fpt )
+ {
+ reporterr( "Cannot allocate gappattern!'n" );
+ reporterr( "Use an approximate method, with the --mafft5 option.\n" );
+ exit( 1 );
+ }
+ (*fpt)[k].freq = 0.0;
+ (*fpt)[k].len = gaplen;
+ (*fpt)[k+1].len = -1;
+ (*fpt)[k+1].freq = 0.0; // iranai
+// reporterr( "gaplen=%d, Unknown, %f\n", gaplen, (*fpt)[k].freq );
+ }
+
+// reporterr( "adding pos %d, len=%d, k=%d, freq=%f->", i, gaplen, k, (*fpt)[k].freq );
+ (*fpt)[k].freq += feff;
+// reporterr( "%f\n", (*fpt)[k].freq );
+ gaplen = 0;
+ }
+ }
+ fpt++;
+ }
+ }
+#if 1
+ for( j=0; j<len+1; j++ )
+ {
+ if( pat[j] )
+ {
+// reporterr( "j=%d\n", j );
+// for( i=1; pat[j][i].len!=-1; i++ )
+// reporterr( "pos=%d, i=%d, len=%d, freq=%f\n", j, i, pat[j][i].len, pat[j][i].freq );
+
+ pat[j][0].len = 0; // iminashi
+ pat[j][0].freq = 0.0;
+ for( i=1; pat[j][i].len!=-1;i++ )
+ {
+ pat[j][0].freq += pat[j][i].freq;
+// reporterr( "totaling, i=%d, result = %f\n", i, pat[j][0].freq );
+ }
+// reporterr( "totaled, result = %f\n", pat[j][0].freq );
+
+ pat[j][i].freq = 1.0 - pat[j][0].freq;
+ pat[j][i].len = 0; // imiari
+ pat[j][i+1].len = -1;
+ }
+ else
+ {
+ pat[j] = (Gappat *)calloc( 3, sizeof( Gappat ) );
+ pat[j][0].freq = 0.0;
+ pat[j][0].len = 0; // iminashi
+
+ pat[j][1].freq = 1.0 - pat[j][0].freq;
+ pat[j][1].len = 0; // imiari
+ pat[j][2].len = -1;
+ }
+ }
+#endif
+}
+
+static int minimum( int i1, int i2 )
+{
+ return MIN( i1, i2 );
+}
+
+static void commongappickpairfast( char *r1, char *r2, char *i1, char *i2, int *skip1, int *skip2 )
+{
+// char *i1bk = i1;
+ int skip, skipped1, skipped2;
+// int skip, skipped1, skipped2, scand1, scand2;
+ skipped1 = skipped2 = 0;
+// reporterr("\n");
+// while( *i1 )
+ while( 1 )
+ {
+// fprintf( stderr, "i1 pos =%d\n", (int)(i1- i1bk) );
+// reporterr( "\nSkip cand %d-%d\n", *skip1-skipped1, *skip2-skipped2 );
+#if 0
+ scand1 = *skip1-skipped1;
+ scand2 = *skip2-skipped2;
+ skip = MIN( scand1, scand2 );
+#else
+ skip = minimum( *skip1-skipped1, *skip2-skipped2 );
+#endif
+// reporterr( "Skip %d\n", skip );
+ i1 += skip;
+ i2 += skip;
+ skipped1 += skip;
+ skipped2 += skip;
+// fprintf( stderr, "i1 pos =%d, nlenmax=%d\n", (int)(i1- i1bk), nlenmax );
+ if( !*i1 ) break;
+// reporterr( "%d, %c-%c\n", i1-i1bk, *i1, *i2 );
+// if( *i1 == '-' && *i2 == '-' ) // iranai?
+// {
+// reporterr( "Error in commongappickpairfast" );
+// exit( 1 );
+// i1++;
+// i2++;
+// }
+ if( *i1 != '-' )
+ {
+ skipped1 = 0;
+ skip1++;
+ }
+ else skipped1++;
+
+ if( *i2 != '-' )
+ {
+ skipped2 = 0;
+ skip2++;
+ }
+ else skipped2++;
+
+ *r1++ = *i1++;
+ *r2++ = *i2++;
+ }
+ *r1 = 0;
+ *r2 = 0;
+}
+
+static void commongappickpair( char *r1, char *r2, char *i1, char *i2 )
+{
+// strcpy( r1, i1 );
+// strcpy( r2, i2 );
+// return; // not SP
+ while( *i1 )
+ {
+ if( *i1 == '-' && *i2 == '-' )
+ {
+ i1++;
+ i2++;
+ }
+ else
+ {
+ *r1++ = *i1++;
+ *r2++ = *i2++;
+ }
+ }
+ *r1 = 0;
+ *r2 = 0;
+}
+
+double naiveRpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
+{
+// return( 0 );
+ int i, j;
+ double val;
+ double valf;
+ int pv;
+ double deff;
+ char *p1, *p2, *p1p, *p2p;
+ val = 0.0;
+ for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+ {
+ deff = eff1[i] * eff2[j];
+// reporterr( "feff %d-%d = %f\n", i, j, feff );
+// reporterr( "i1 = %s\n", seq1[i] );
+// reporterr( "i2 = %s\n", seq2[j] );
+// reporterr( "s1 = %s\n", s1 );
+// reporterr( "s2 = %s\n", s2 );
+// reporterr( "penal = %d\n", penal );
+
+ valf = 0;
+ p1 = seq1[i]; p2 = seq2[j];
+ pv = 0;
+ if( *p1 == '-' && *p2 != '-' )
+ pv = penal;
+ if( *p1 != '-' && *p2 == '-' )
+ pv = penal;
+// if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
+ p1p = p1; p2p = p2;
+ valf += (double)amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv;
+ while( *p1p )
+ {
+ pv = 0;
+ if( *p1p != '-' && *p2p != '-' )
+ {
+ if( *p1 == '-' && *p2 != '-' )
+ pv = penal;
+ if( *p1 != '-' && *p2 == '-' )
+ pv = penal;
+ if( *p1 != '-' && *p2 != '-' )
+ ;
+ if( *p1 == '-' && *p2 == '-' )
+ ;
}
- continue;
+ if( *p1p == '-' && *p2p == '-' )
+ {
+ if( *p1 == '-' && *p2 != '-' )
+ pv = penal;
+// ;
+ if( *p1 != '-' && *p2 == '-' )
+ pv = penal;
+// ;
+ if( *p1 != '-' && *p2 != '-' )
+ ;
+ if( *p1 == '-' && *p2 == '-' )
+ ;
+ }
+ if( *p1p != '-' && *p2p == '-' )
+ {
+ if( *p1 == '-' && *p2 != '-' )
+ pv = penal * 2; // ??
+// ;
+ if( *p1 != '-' && *p2 == '-' )
+ ;
+ if( *p1 != '-' && *p2 != '-' )
+ pv = penal;
+// ;
+ if( *p1 == '-' && *p2 == '-' )
+ pv = penal;
+// ;
+ }
+ if( *p1p == '-' && *p2p != '-' )
+ {
+ if( *p1 == '-' && *p2 != '-' )
+ ;
+ if( *p1 != '-' && *p2 == '-' )
+ pv = penal * 2; // ??
+// ;
+ if( *p1 != '-' && *p2 != '-' )
+ pv = penal;
+// ;
+ if( *p1 == '-' && *p2 == '-' )
+ pv = penal;
+// ;
+ }
+// reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
+// if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
+ valf += amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv;
+ p1p++; p2p++;
}
+// reporterr( "valf = %d\n", valf );
+ val += deff * ( valf );
+ }
+ reporterr( "val = %f\n", val );
+ return( val );
+// exit( 1 );
+}
+double naiveQpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
+{
+ int i, j;
+ double val;
+ double valf;
+ int pv;
+ double deff;
+ char *p1, *p2, *p1p, *p2p;
+ return( 0 );
+ val = 0.0;
+ for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+ {
+ deff = eff1[i] * eff2[j];
+// reporterr( "feff %d-%d = %f\n", i, j, feff );
+// reporterr( "i1 = %s\n", seq1[i] );
+// reporterr( "i2 = %s\n", seq2[j] );
+// reporterr( "s1 = %s\n", s1 );
+// reporterr( "s2 = %s\n", s2 );
+// reporterr( "penal = %d\n", penal );
- score += ( amino_dis[(int)*pt1++][(int)*pt2++] - thr );
-// score += ( amino_dis[(int)*pt1++][(int)*pt2++] );
- if( score > maxscore )
+ valf = 0;
+ p1 = seq1[i]; p2 = seq2[j];
+ pv = 0;
+ if( *p1 == '-' && *p2 != '-' )
+ pv = penal;
+ if( *p1 != '-' && *p2 == '-' )
+ pv = penal;
+// if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
+ p1p = p1; p2p = p2;
+ valf += (double)amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv;
+ while( *p1p )
{
-// fprintf( stderr, "score = %f\n", score );
- maxscore = score;
- maxstart = start;
-// fprintf( stderr, "## max! maxstart = %d, start = %d\n", maxstart, start );
+ pv = 0;
+ if( *p1p != '-' && *p2p != '-' )
+ {
+ if( *p1 == '-' && *p2 != '-' )
+ pv = penal;
+ if( *p1 != '-' && *p2 == '-' )
+ pv = penal;
+ if( *p1 != '-' && *p2 != '-' )
+ ;
+ if( *p1 == '-' && *p2 == '-' )
+ ;
+ }
+ if( *p1p == '-' && *p2p == '-' )
+ {
+ if( *p1 == '-' && *p2 != '-' )
+// pv = penal;
+ ;
+ if( *p1 != '-' && *p2 == '-' )
+// pv = penal;
+ ;
+ if( *p1 != '-' && *p2 != '-' )
+ ;
+ if( *p1 == '-' && *p2 == '-' )
+ ;
+ }
+ if( *p1p != '-' && *p2p == '-' )
+ {
+ if( *p1 == '-' && *p2 != '-' )
+ pv = penal * 2; // ??
+// ;
+ if( *p1 != '-' && *p2 == '-' )
+ ;
+ if( *p1 != '-' && *p2 != '-' )
+ pv = penal;
+// ;
+ if( *p1 == '-' && *p2 == '-' )
+// pv = penal;
+ ;
+ }
+ if( *p1p == '-' && *p2p != '-' )
+ {
+ if( *p1 == '-' && *p2 != '-' )
+ ;
+ if( *p1 != '-' && *p2 == '-' )
+ pv = penal * 2; // ??
+// ;
+ if( *p1 != '-' && *p2 != '-' )
+ pv = penal;
+// ;
+ if( *p1 == '-' && *p2 == '-' )
+// pv = penal;
+ ;
+ }
+// reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
+// if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
+ valf += amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv;
+ p1p++; p2p++;
}
- if( score < 0.0 )
+// reporterr( "valf = %d\n", valf );
+ val += deff * ( valf );
+ }
+ reporterr( "val = %f\n", val );
+ return( val );
+// exit( 1 );
+}
+double naiveHpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
+{
+ int i, j;
+ double val;
+ double valf;
+ int pv;
+// double feff = 0.0; // by D.Mathog, a guess
+ double deff;
+ char *p1, *p2, *p1p, *p2p;
+ val = 0.0;
+ for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+ {
+ deff = eff1[i] * eff2[j];
+// reporterr( "i1 = %s\n", seq1[i] );
+// reporterr( "i2 = %s\n", seq2[j] );
+// reporterr( "s1 = %s\n", s1 );
+// reporterr( "s2 = %s\n", s2 );
+// reporterr( "penal = %d\n", penal );
+
+ valf = 0;
+ p1 = seq1[i]; p2 = seq2[j];
+ pv = 0;
+ if( *p1 == '-' && *p2 != '-' )
+ pv = penal;
+ if( *p1 != '-' && *p2 == '-' )
+ pv = penal;
+ if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, (int)(p1-seq1[i]), (int)(p2-seq2[j]) );
+ p1p = p1; p2p = p2;
+ valf += (double)amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv;
+ while( *p1p )
{
-// fprintf( stderr, "## resetting, start = %d, maxstart = %d\n", start, maxstart );
- if( start == maxstart )
+ pv = 0;
+ if( *p1p != '-' && *p2p != '-' )
{
- maxend = pt1 - s1;
-// fprintf( stderr, "maxend = %d\n", maxend );
+ if( *p1 == '-' && *p2 != '-' )
+ pv = penal;
+ if( *p1 != '-' && *p2 == '-' )
+ pv = penal;
+ if( *p1 != '-' && *p2 != '-' )
+ ;
+ if( *p1 == '-' && *p2 == '-' )
+ ;
}
- score = 0.0;
- start = pt1 - s1;
+ if( *p1p == '-' && *p2p == '-' )
+ {
+ if( *p1 == '-' && *p2 != '-' )
+// pv = penal;
+ ;
+ if( *p1 != '-' && *p2 == '-' )
+// pv = penal;
+ ;
+ if( *p1 != '-' && *p2 != '-' )
+ ;
+ if( *p1 == '-' && *p2 == '-' )
+ ;
+ }
+ if( *p1p != '-' && *p2p == '-' )
+ {
+ if( *p1 == '-' && *p2 != '-' )
+// pv = penal;
+ ;
+ if( *p1 != '-' && *p2 == '-' )
+ ;
+ if( *p1 != '-' && *p2 != '-' )
+ pv = penal;
+ if( *p1 == '-' && *p2 == '-' )
+// pv = penal;
+ ;
+ }
+ if( *p1p == '-' && *p2p != '-' )
+ {
+ if( *p1 == '-' && *p2 != '-' )
+ ;
+ if( *p1 != '-' && *p2 == '-' )
+// pv = penal;
+ ;
+ if( *p1 != '-' && *p2 != '-' )
+ pv = penal;
+ if( *p1 == '-' && *p2 == '-' )
+// pv = penal;
+ ;
+ }
+// reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
+// if( pv ) reporterr( "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
+ valf += amino_dis[(unsigned char)*p1++][(unsigned char)*p2++] + 0.5 * pv;
+ p1p++; p2p++;
}
+// reporterr( "valf = %d\n", valf );
+ val += deff * ( valf );
}
- if( start == maxstart )
- maxend = pt1 - s1 - 1;
-
-// fprintf( stderr, "maxstart = %d, maxend = %d, maxscore = %f\n", maxstart, maxend, maxscore );
- s1[maxend+1] = 0;
- s2[maxend+1] = 0;
- return( maxstart );
+ reporterr( "val = %f\n", val );
+ return( val );
+// exit( 1 );
}
-void resetlocalhom( int nseq, LocalHom **lh )
+double naivepairscorefast( char *seq1, char *seq2, int *skip1, int *skip2, int penal )
{
- int i, j;
- LocalHom *pt;
+ double vali;
+ int len = strlen( seq1 );
+ char *s1, *s2;
+ char *p1, *p2;
- for( i=0; i<nseq-1; i++ ) for( j=i+1; j<nseq; j++ )
+ s1 = calloc( len+1, sizeof( char ) );
+ s2 = calloc( len+1, sizeof( char ) );
{
- for( pt=lh[i]+j; pt; pt=pt->next )
- pt->opt = 1.0;
- }
-
-}
+ vali = 0.0;
+ commongappickpairfast( s1, s2, seq1, seq2, skip1, skip2 );
+// commongappickpair( s1, s2, seq1, seq2 );
+// printf( "\n###s1 = %s\n", seq1 );
+// printf( "###s2 = %s\n", seq2 );
+// printf( "\n###i1 = %s\n", s1 );
+// printf( "###i2 = %s\n", s2 );
+// printf( "allocated size, len+1 = %d\n", len+1 );
+// printf( "###penal = %d\n", penal );
-void gapireru( char *res, char *ori, char *gt )
-{
- char g;
- while( (g = *gt++) )
- {
- if( g == '-' )
- {
- *res++ = *newgapstr;
- }
- else
+ p1 = s1; p2 = s2;
+ while( *p1 )
{
- *res++ = *ori++;
+ if( *p1 == '-' )
+ {
+// reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
+ vali += (double)penal;
+// while( *p1 == '-' || *p2 == '-' )
+ while( *p1 == '-' ) // SP
+ {
+ p1++;
+ p2++;
+ }
+ continue;
+ }
+ if( *p2 == '-' )
+ {
+// reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
+ vali += (double)penal;
+// while( *p2 == '-' || *p1 == '-' )
+ while( *p2 == '-' ) // SP
+ {
+ p1++;
+ p2++;
+ }
+ continue;
+ }
+// reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
+ vali += (double)amino_dis[(unsigned char)*p1++][(unsigned char)*p2++];
}
}
- *res = 0;
+ free( s1 );
+ free( s2 );
+// reporterr( "###vali = %d\n", vali );
+ return( vali );
}
-void getkyokaigap( char *g, char **s, int pos, int n )
+double naivepairscore11_dynmtx( double **mtx, char *seq1, char *seq2, int penal )
{
-// char *bk = g;
-// while( n-- ) *g++ = '-';
- while( n-- ) *g++ = (*s++)[pos];
-
-// fprintf( stderr, "bk = %s\n", bk );
-}
+ double vali;
+ int len = strlen( seq1 );
+ char *s1, *s2, *p1, *p2;
+ int c1, c2;
-void new_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat )
-#if 0
-{
- int i, j, gc, gb;
- float feff;
-
- for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
- for( j=0; j<clus; j++ )
+ s1 = calloc( len+1, sizeof( char ) );
+ s2 = calloc( len+1, sizeof( char ) );
{
- feff = (float)eff[j];
- gc = ( sgappat[j] == '-' );
- for( i=0; i<len; i++ )
+ vali = 0.0;
+ commongappickpair( s1, s2, seq1, seq2 );
+// reporterr( "###i1 = %s\n", s1 );
+// reporterr( "###i2 = %s\n", s2 );
+// reporterr( "###penal = %d\n", penal );
+
+ p1 = s1; p2 = s2;
+ while( *p1 )
{
- gb = gc;
- gc = ( seq[j][i] == '-' );
- if( !gb * gc ) ogcp[i] += feff;
+ if( *p1 == '-' )
+ {
+// reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
+ vali += (double)penal;
+// while( *p1 == '-' || *p2 == '-' )
+ while( *p1 == '-' ) // SP
+ {
+ p1++;
+ p2++;
+ }
+ continue;
+ }
+ if( *p2 == '-' )
+ {
+// reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
+ vali += (double)penal;
+// while( *p2 == '-' || *p1 == '-' )
+ while( *p2 == '-' ) // SP
+ {
+ p1++;
+ p2++;
+ }
+ continue;
+ }
+// reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
+ c1 = amino_n[(unsigned char)*p1++];
+ c2 = amino_n[(unsigned char)*p2++];
+ vali += (double)mtx[c1][c2];
}
}
+ free( s1 );
+ free( s2 );
+// reporterr( "###vali = %d\n", vali );
+ return( vali );
}
-#else
+
+double naivepairscore11( char *seq1, char *seq2, int penal )
{
- int i, j, gc, gb;
- float feff;
- float *fpt;
- char *spt;
-
- fpt = ogcp;
- i = len;
- while( i-- ) *fpt++ = 0.0;
- for( j=0; j<clus; j++ )
+ double vali;
+ int len = strlen( seq1 );
+ char *s1, *s2, *p1, *p2;
+
+ s1 = calloc( len+1, sizeof( char ) );
+ s2 = calloc( len+1, sizeof( char ) );
{
- feff = (float)eff[j];
- spt = seq[j];
- fpt = ogcp;
- gc = ( sgappat[j] == '-' );
- i = len;
- while( i-- )
+ vali = 0.0;
+ commongappickpair( s1, s2, seq1, seq2 );
+// reporterr( "###i1 = %s\n", s1 );
+// reporterr( "###i2 = %s\n", s2 );
+// reporterr( "###penal = %d\n", penal );
+
+ p1 = s1; p2 = s2;
+ while( *p1 )
{
- gb = gc;
- gc = ( *spt++ == '-' );
+ if( *p1 == '-' )
{
- if( !gb * gc ) *fpt += feff;
- fpt++;
+// reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
+ vali += (double)penal;
+// while( *p1 == '-' || *p2 == '-' )
+ while( *p1 == '-' ) // SP
+ {
+ p1++;
+ p2++;
+ }
+ continue;
+ }
+ if( *p2 == '-' )
+ {
+// reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
+ vali += (double)penal;
+// while( *p2 == '-' || *p1 == '-' )
+ while( *p2 == '-' ) // SP
+ {
+ p1++;
+ p2++;
+ }
+ continue;
}
+// reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
+ vali += (double)amino_dis[(unsigned char)*p1++][(unsigned char)*p2++];
}
}
+ free( s1 );
+ free( s2 );
+// reporterr( "###vali = %d\n", vali );
+ return( vali );
}
-#endif
-void new_OpeningGapCount_zure( float *ogcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
-#if 0
-{
- int i, j, gc, gb;
- float feff;
-
- for( i=0; i<len+1; i++ ) ogcp[i] = 0.0;
- for( j=0; j<clus; j++ )
+double naivepairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
+{
+// return( 0.0 );
+ int i, j;
+ double val;
+ int vali;
+ double feff;
+ int len = strlen( seq1[0] );
+ char *s1, *s2, *p1, *p2;
+ s1 = calloc( len+1, sizeof( char ) );
+ s2 = calloc( len+1, sizeof( char ) );
+ val = 0.0;
+ for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
{
- feff = (float)eff[j];
- gc = ( sgappat[j] == '-' );
- for( i=0; i<len; i++ )
- {
- gb = gc;
- gc = ( seq[j][i] == '-' );
- if( !gb * gc ) ogcp[i] += feff;
- }
+ vali = 0;
+ feff = eff1[i] * eff2[j];
+// reporterr( "feff %d-%d = %f\n", i, j, feff );
+ commongappickpair( s1, s2, seq1[i], seq2[j] );
+// reporterr( "i1 = %s\n", seq1[i] );
+// reporterr( "i2 = %s\n", seq2[j] );
+// reporterr( "s1 = %s\n", s1 );
+// reporterr( "s2 = %s\n", s2 );
+// reporterr( "penal = %d\n", penal );
+
+ p1 = s1; p2 = s2;
+ while( *p1 )
{
- gb = gc;
- gc = ( egappat[j] == '-' );
- if( !gb * gc ) ogcp[i] += feff;
+ if( *p1 == '-' )
+ {
+// reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
+ vali += penal;
+// while( *p1 == '-' || *p2 == '-' )
+ while( *p1 == '-' ) // SP
+ {
+ p1++;
+ p2++;
+ }
+ continue;
+ }
+ if( *p2 == '-' )
+ {
+// reporterr( "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
+ vali += penal;
+// while( *p2 == '-' || *p1 == '-' )
+ while( *p2 == '-' ) // SP
+ {
+ p1++;
+ p2++;
+ }
+ continue;
+ }
+// reporterr( "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
+ vali += amino_dis[(unsigned char)*p1++][(unsigned char)*p2++];
}
+// reporterr( "vali = %d\n", vali );
+ val += feff * vali;
}
+ free( s1 );
+ free( s2 );
+ reporterr( "val = %f\n", val );
+ return( val );
+// exit( 1 );
}
-#else
+
+double plainscore( int nseq, char **s )
{
- int i, j, gc, gb;
- float feff;
- float *fpt;
- char *spt;
+ int i, j, ilim;
+ double v = 0.0;
- fpt = ogcp;
- i = len+2;
- while( i-- ) *fpt++ = 0.0;
- for( j=0; j<clus; j++ )
+ ilim = nseq-1;
+ for( i=0; i<ilim; i++ ) for( j=i+1; j<nseq; j++ )
{
- feff = (float)eff[j];
- spt = seq[j];
- fpt = ogcp;
- gc = ( sgappat[j] == '-' );
- i = len;
- while( i-- )
- {
- gb = gc;
- gc = ( *spt++ == '-' );
- {
- if( !gb * gc ) *fpt += feff;
- fpt++;
- }
- }
- {
- gb = gc;
- gc = ( egappat[j] == '-' );
- if( !gb * gc ) *fpt += feff;
- }
+ v += (double)naivepairscore11( s[i], s[j], penalty );
}
+
+ reporterr( "penalty = %d\n", penalty );
+
+ return( v );
}
-#endif
-void new_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
-#if 0
+
+int addonetip( int njobc, int ***topolc, double **lenc, double **iscorec, int ***topol, double **len, Treedep *dep, int treeout, Addtree *addtree, int iadd, char **name, int *alnleninnode, int *nogaplen, int noalign )
{
- int i, j, gc, gb;
- float feff;
-
- for( i=0; i<len+1; i++ ) fgcp[i] = 0.0;
- for( j=0; j<clus; j++ )
+ int i, j, mem0, mem1, posinnew, m;
+ int nstep;
+ int norg;
+ double minscore, minscoreo, eff0, eff1, addedlen, tmpmin;
+ int nearest, nearesto;
+ int repnorg;
+ int *leaf2node;
+ int *additionaltopol;
+// double (*clusterfuncpt[1])(double,double);
+ Bchain *ac, *acpt, *acori, *acnext, *acprev;
+ int neighbor;
+ char *neighborlist;
+ char *npt;
+ int reflen, nearestnode, nogaplentoadd;
+ int *topoldum0 = NULL;
+ int *topoldum1 = NULL;
+ int *topolo0;
+ int *topolo1;
+ int seqlengthcondition;
+ double sueff1_double_local = 1.0 - sueff_global;
+ double sueff05_double_local = sueff_global * 0.5;
+// char **tree; //static?
+// char *treetmp; //static?
+
+// for( i=0; i<njobc; i++ ) reporterr( "nogaplen of %d = %d\n", i+1, nogaplen[i] );
+//exit( 1 );
+
+
+// treetmp = AllocateCharVec( njob*150 );
+// tree = AllocateCharMtx( njob, njob*150 );
+
+// sueff1_double = 1.0 - sueff_global;
+// sueff05_double = sueff_global * 0.5;
+// if ( treemethod == 'X' )
+// clusterfuncpt[0] = cluster_mix_double;
+// else if ( treemethod == 'E' )
+// clusterfuncpt[0] = cluster_average_double;
+// else if ( treemethod == 'q' )
+// clusterfuncpt[0] = cluster_minimum_double;
+// else
+// {
+// reporterr( "Unknown treemethod, %c\n", treemethod );
+// exit( 1 );
+// }
+
+ norg = njobc-1;
+ nstep = njobc-2;
+
+ additionaltopol = (int *)calloc( 2, sizeof( int ) );
+ leaf2node= (int *)calloc( norg, sizeof( int ) );
+ if( treeout )
+ {
+ neighborlist = calloc( norg * 30, sizeof( char ) );
+ }
+// for( i=0; i<njobc; i++ ) sprintf( tree[i], "%d", i+1 );
+ if( !leaf2node )
+ {
+ reporterr( "Cannot allocate leaf2node.\n" );
+ exit( 1 );
+ }
+ additionaltopol[0] = norg;
+ additionaltopol[1] = -1;
+
+ ac = (Bchain *)malloc( norg * sizeof( Bchain ) );
+ for( i=0; i<norg; i++ )
{
- feff = (float)eff[j];
- gc = ( sgappat[j] == '-' );
- for( i=0; i<len; i++ )
- {
- gb = gc;
- gc = ( seq[j][i] == '-' );
- {
- if( gb * !gc ) fgcp[i] += feff;
- }
- }
+ ac[i].next = ac+i+1;
+ ac[i].prev = ac+i-1;
+ ac[i].pos = i;
+ }
+ ac[norg-1].next = NULL;
+
+
+ acori = (Bchain *)malloc( 1 * sizeof( Bchain ) );
+ acori->next = ac;
+ acori->pos = -1;
+ ac[0].prev = acori;
+
+
+// for( i=0; i<nstep; i++ )
+// {
+// reporterr( "distfromtip = %f\n", dep[i].distfromtip );
+// }
+//
+// for( i=0; i<norg; i++ )
+// {
+// reporterr( "disttofrag(%d,%d) = %f\n", i, njobc-1, iscorec[i][norg-i] );
+// }
+
+
+ minscore = 9999.9;
+ nearest = -1;
+ for( i=0; i<norg; i++ )
+ {
+ tmpmin = iscorec[i][norg-i];
+ if( minscore > tmpmin )
{
- gb = gc;
- gc = ( egappat[j] == '-' );
- {
- if( gb * !gc ) fgcp[len] += feff;
- }
+ minscore = tmpmin;
+ nearest = i;
}
}
-}
-#else
-{
- int i, j, gc, gb;
- float feff;
- float *fpt;
- char *spt;
-
- fpt = fgcp;
- i = len+2;
- while( i-- ) *fpt++ = 0.0;
- for( j=0; j<clus; j++ )
+ nearesto = nearest;
+ minscoreo = minscore;
+
+
+
+// for( i=0; i<njobc-1; i++ ) for( j=i+1; j<njobc; j++ )
+// reporterr( "iscorec[%d][%d] = %f\n", i, j, iscorec[i][j-i] );
+// reporterr( "nearest = %d\n", nearest+1 );
+// reporterr( "nearesto = %d\n", nearesto+1 );
+
+ posinnew = 0;
+ repnorg = -1;
+ nogaplentoadd = nogaplen[norg];
+
+
+
+ for( i=0; i<norg; i++ ) leaf2node[i] = -1;
+ for( i=0; i<nstep; i++ )
{
- feff = (float)eff[j];
- fpt = fgcp;
- spt = seq[j];
- gc = ( sgappat[j] == '-' );
- i = len;
- while( i-- )
+ mem0 = topol[i][0][0];
+ mem1 = topol[i][1][0];
+#if 0
+ reporterr( "\n\nstep %d (old) \n", i );
+
+ reporterr( "group0 = \n" );
+ for( j=0; topol[i][0][j]>-1; j++ )
{
- gb = gc;
- gc = ( *spt++ == '-' );
- {
- if( gb * !gc ) *fpt += feff;
- fpt++;
- }
+ reporterr( "%d ", topol[i][0][j]+1 );
}
+ reporterr( "\n" );
+ reporterr( "len=%f\n", len[i][0] );
+ reporterr( "group1 = \n" );
+ for( j=0; topol[i][1][j]>-1; j++ )
{
- gb = gc;
- gc = ( egappat[j] == '-' );
- {
- if( gb * !gc ) *fpt += feff;
- }
+ reporterr( "%d ", topol[i][1][j]+1 );
}
- }
-}
-#endif
-void new_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len, char *egappat )
-#if 0
-{
- int i, j, gc, gb;
- float feff;
-
- for( i=0; i<len; i++ ) fgcp[i] = 0.0;
- for( j=0; j<clus; j++ )
- {
- feff = (float)eff[j];
- gc = ( seq[j][0] == '-' );
- for( i=1; i<len; i++ )
+ reporterr( "\n" );
+ reporterr( "len=%f\n", len[i][1] );
+
+ reporterr( "\n\n\nminscore = %f ? %f\n", minscore, dep[i].distfromtip*2 );
+ reporterr( "i = %d\n", i );
+ if( leaf2node[nearest] == -1 )
{
- gb = gc;
- gc = ( seq[j][i] == '-' );
- {
- if( gb * !gc ) fgcp[i-1] += feff;
- }
+ reporterr( "nogaplen[nearest] = %d\n", nogaplen[nearest] );
}
+ else
{
- gb = gc;
- gc = ( egappat[j] == '-' );
- {
- if( gb * !gc ) fgcp[len-1] += feff;
- }
+ reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] );
+ reporterr( "leaf2node[nearest] = %d\n", leaf2node[nearest] );
}
- }
-}
-#else
-{
- int i, j, gc, gb;
- float feff;
- float *fpt;
- char *spt;
-
- fpt = fgcp;
- i = len;
- while( i-- ) *fpt++ = 0.0;
- for( j=0; j<clus; j++ )
- {
- feff = (float)eff[j];
- fpt = fgcp;
- spt = seq[j];
- gc = ( *spt == '-' );
- i = len;
- while( i-- )
+#endif
+ nearestnode = leaf2node[nearest];
+ if( nearestnode == -1 )
+ reflen = nogaplen[nearest];
+ else
+ reflen = alnleninnode[nearestnode];
+// reflen = alnleninnode[i]; // BUG!!
+
+ if( noalign ) seqlengthcondition = 1;
+ else seqlengthcondition = ( nogaplentoadd <= reflen );
+
+//seqlengthcondition = 1; // CHUUI
+//seqlengthcondition = ( nogaplentoadd <= reflen ); // CHUUI
+
+ if( repnorg == -1 && dep[i].distfromtip * 2 > minscore && seqlengthcondition ) // Keitouteki ichi ha fuseikaku.
+// if( repnorg == -1 && dep[i].distfromtip * 2 > minscore ) // Keitouteki ichi dake ga hitsuyouna baaiha kore wo tsukau.
{
- gb = gc;
- gc = ( *++spt == '-' );
+// reporterr( "INSERT HERE, %d-%d\n", nearest, norg );
+// reporterr( "nearest = %d\n", nearest );
+// reporterr( "\n\n\nminscore = %f\n", minscore );
+// reporterr( "distfromtip *2 = %f\n", dep[i].distfromtip * 2 );
+// reporterr( "nearest=%d, leaf2node[]=%d\n", nearest, leaf2node[nearest] );
+
+ if( nearestnode == -1 )
{
- if( gb * !gc ) *fpt += feff;
- fpt++;
+// reporterr( "INSERTING to 0!!!\n" );
+// reporterr( "lastlength = %d\n", nogaplen[norg] );
+// reporterr( "reflength = %d\n", nogaplen[nearest] );
+ topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( 1 + 1 ) * sizeof( int ) );
+ topolc[posinnew][0][0] = nearest;
+ topolc[posinnew][0][1] = -1;
+
+ addedlen = lenc[posinnew][0] = minscore / 2;
+
}
- }
- {
- gb = gc;
- gc = ( egappat[j] == '-' );
+ else
{
- if( gb * !gc ) *fpt += feff;
+// reporterr( "INSERTING to g, leaf2node = %d, cm=%d!!!\n", leaf2node[nearest], countmem(topol[leaf2node[nearest]][0] ) );
+// reporterr( "alnleninnode[i] = %d\n", alnleninnode[i] );
+// reporterr( "alnleninnode[leaf2node[nearest]] = %d\n", alnleninnode[leaf2node[nearest]] );
+
+ topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( ( countmem( topol[nearestnode][0] ) + countmem( topol[nearestnode][1] ) + 1 ) * sizeof( int ) ) );
+// reporterr( "leaf2node[%d] = %d\n", nearest, leaf2node[nearest] );
+ intcpy( topolc[posinnew][0], topol[nearestnode][0] );
+ intcat( topolc[posinnew][0], topol[nearestnode][1] );
+// addedlen = lenc[posinnew][0] = minscore / 2 - len[nearestnode][0]; // bug!!
+ addedlen = lenc[posinnew][0] = dep[i].distfromtip - minscore / 2; // 2014/06/10
+// fprintf( stderr, "addedlen = %f, dep[i].distfromtip = %f, len[nearestnode][0] = %f, minscore/2 = %f, lenc[posinnew][0] = %f\n", addedlen, dep[i].distfromtip, len[nearestnode][0], minscore/2, lenc[posinnew][0] );
+
}
- }
- }
-}
+ neighbor = lastmem( topolc[posinnew][0] );
+
+ if( treeout )
+ {
+#if 0
+ fp = fopen( "infile.tree", "a" ); // kyougou!!
+ if( fp == 0 )
+ {
+ reporterr( "File error!\n" );
+ exit( 1 );
+ }
+ fprintf( fp, "\n" );
+ fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] );
+ fprintf( fp, " nearest sequence: %d\n", nearest + 1 );
+ fprintf( fp, " distance: %f\n", minscore );
+ fprintf( fp, " cousin: " );
+ for( j=0; topolc[posinnew][0][j]!=-1; j++ )
+ fprintf( fp, "%d ", topolc[posinnew][0][j]+1 );
+ fprintf( fp, "\n" );
+ fclose( fp );
+#else
+ addtree[iadd].nearest = nearesto;
+ addtree[iadd].dist1 = minscoreo;
+ addtree[iadd].dist2 = minscore;
+ neighborlist[0] = 0;
+ npt = neighborlist;
+ for( j=0; topolc[posinnew][0][j]!=-1; j++ )
+ {
+ sprintf( npt, "%d ", topolc[posinnew][0][j]+1 );
+ npt += strlen( npt );
+ }
+ addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) );
+ strcpy( addtree[iadd].neighbors, neighborlist );
#endif
+ }
-void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len )
-{
- int i, j, gc, gb;
- float feff;
- float *fpt;
- char *spt;
-
- fpt = ogcp;
- i = len;
- while( i-- ) *fpt++ = 0.0;
- for( j=0; j<clus; j++ )
- {
- feff = (float)eff[j];
- spt = seq[j];
- fpt = ogcp;
- gc = 0;
-// gc = 1;
- i = len;
- while( i-- )
- {
- gb = gc;
- gc = ( *spt++ == '-' );
+// reporterr( "INSERTING to 1!!!\n" );
+ topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( 1 + 1 ) * sizeof( int ) );
+ topolc[posinnew][1][0] = norg;
+ topolc[posinnew][1][1] = -1;
+ lenc[posinnew][1] = minscore / 2;
+
+// reporterr( "STEP %d (newnew)\n", posinnew );
+// for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 );
+// reporterr( "\n len=%f\n", lenc[posinnew][0] );
+// for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 );
+// reporterr( "\n len=%f\n", lenc[posinnew][1] );
+
+ repnorg = nearest;
+
+// reporterr( "STEP %d\n", posinnew );
+// for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] );
+// reporterr( "\n len=%f\n", lenc[i][0] );
+// for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] );
+// reporterr( "\n len=%f\n", lenc[i][1] );
+
+// im = topolc[posinnew][0][0];
+// jm = topolc[posinnew][1][0];
+// sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] );
+// strcpy( tree[im], treetmp );
+
+ posinnew++;
+ }
+
+// reporterr( "minscore = %f\n", minscore );
+// reporterr( "distfromtip = %f\n", dep[i].distfromtip );
+// reporterr( "Modify matrix, %d-%d\n", nearest, norg );
+ eff0 = iscorec[mem0][norg-mem0];
+ eff1 = iscorec[mem1][norg-mem1];
+
+// iscorec[mem0][norg-mem0] = (clusterfuncpt[0])( eff0, eff1 );
+ iscorec[mem0][norg-mem0] = MIN( eff0, eff1 ) * sueff1_double_local + ( eff0 + eff1 ) * sueff05_double_local;
+ iscorec[mem1][norg-mem1] = 9999.9; // sukoshi muda
+
+ acprev = ac[mem1].prev;
+ acnext = ac[mem1].next;
+ acprev->next = acnext;
+ if( acnext != NULL ) acnext->prev = acprev;
+
+ if( ( nearest == mem1 || nearest == mem0 ) )
+ {
+ minscore = 9999.9;
+// for( j=0; j<norg; j++ ) // sukoshi muda
+// {
+// if( minscore > iscorec[j][norg-j] )
+// {
+// minscore = iscorec[j][norg-j];
+// nearest = j;
+// }
+// }
+// reporterr( "searching on modified ac " );
+ for( acpt=acori->next; acpt!=NULL; acpt=acpt->next ) // sukoshi muda
{
- if( !gb * gc ) *fpt += feff;
- fpt++;
+// reporterr( "." );
+ j = acpt->pos;
+ tmpmin = iscorec[j][norg-j];
+ if( minscore > tmpmin )
+ {
+ minscore = tmpmin;
+ nearest = j;
+ }
}
+// reporterr( "done\n" );
}
- }
- ogcp[len] = 0.0;
-}
-void st_FinalGapCount_zure( float *fgcp, int clus, char **seq, double *eff, int len )
-{
- int i, j, gc, gb;
- float feff;
- float *fpt;
- char *spt;
-
- fpt = fgcp;
- i = len+1;
- while( i-- ) *fpt++ = 0.0;
- for( j=0; j<clus; j++ )
- {
- feff = (float)eff[j];
- fpt = fgcp+1;
- spt = seq[j];
- gc = ( *spt == '-' );
- i = len;
-// for( i=1; i<len; i++ )
- while( i-- )
+// reporterr( "posinnew = %d\n", posinnew );
+
+
+ if( topol[i][0][0] == repnorg )
{
- gb = gc;
- gc = ( *++spt == '-' );
- {
- if( gb * !gc ) *fpt += feff;
- fpt++;
- }
+ topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 2 ) * sizeof( int ) );
+ intcpy( topolc[posinnew][0], topol[i][0] );
+ intcat( topolc[posinnew][0], additionaltopol );
+ lenc[posinnew][0] = len[i][0] - addedlen; // 2014/6/10
+// fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip );
+// fprintf( stderr, "addedlen=%f, len[i][0]=%f, lenc[][0]=%f\n", addedlen, len[i][0], lenc[posinnew][0] );
+// fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][0] );
+ addedlen = 0.0;
}
+ else
{
- gb = gc;
- gc = 0;
-// gc = 1;
- {
- if( gb * !gc ) *fpt += feff;
- }
+ topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + 1 ) * sizeof( int ) );
+ intcpy( topolc[posinnew][0], topol[i][0] );
+ lenc[posinnew][0] = len[i][0];
}
- }
-}
-void st_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len )
-{
- int i, j, gc, gb;
- float feff;
- float *fpt;
- char *spt;
-
- fpt = fgcp;
- i = len;
- while( i-- ) *fpt++ = 0.0;
- for( j=0; j<clus; j++ )
- {
- feff = (float)eff[j];
- fpt = fgcp;
- spt = seq[j];
- gc = ( *spt == '-' );
- i = len;
-// for( i=1; i<len; i++ )
- while( i-- )
+ if( topol[i][1][0] == repnorg )
{
- gb = gc;
- gc = ( *++spt == '-' );
- {
- if( gb * !gc ) *fpt += feff;
- fpt++;
- }
+ topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 2 ) * sizeof( int ) );
+ intcpy( topolc[posinnew][1], topol[i][1] );
+ intcat( topolc[posinnew][1], additionaltopol );
+ lenc[posinnew][1] = len[i][1] - addedlen; // 2014/6/10
+// fprintf( stderr, "i=%d, dep[i].distfromtip=%f\n", i, dep[i].distfromtip );
+// fprintf( stderr, "addedlen=%f, len[i][1]=%f, lenc[][1]=%f\n", addedlen, len[i][1], lenc[posinnew][1] );
+// fprintf( stderr, "lenc[][1] = %f\n", lenc[posinnew][1] );
+ addedlen = 0.0;
+
+ repnorg = topolc[posinnew][0][0]; // juuyou
}
+ else
{
- gb = gc;
- gc = 0;
-// gc = 1;
- {
- if( gb * !gc ) *fpt += feff;
- }
+ topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], ( countmem( topol[i][1] ) + 1 ) * sizeof( int ) );
+ intcpy( topolc[posinnew][1], topol[i][1] );
+ lenc[posinnew][1] = len[i][1];
}
+
+// reporterr( "\nSTEP %d (new)\n", posinnew );
+// for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j]+1 );
+// reporterr( "\n len=%f\n", lenc[posinnew][0] );
+// for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j]+1 );
+// reporterr( "\n len=%f\n", lenc[posinnew][1] );
+
+// reporterr("\ni=%d\n####### leaf2node[nearest]= %d\n", i, leaf2node[nearest] );
+
+ for( j=0; (m=topol[i][0][j])!=-1; j++ ) leaf2node[m] = i;
+ for( j=0; (m=topol[i][1][j])!=-1; j++ ) leaf2node[m] = i;
+
+// reporterr("####### leaf2node[nearest]= %d\n", leaf2node[nearest] );
+
+// im = topolc[posinnew][0][0];
+// jm = topolc[posinnew][1][0];
+// sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[posinnew][0], tree[jm], lenc[posinnew][1] );
+// strcpy( tree[im], treetmp );
+//
+// reporterr( "%s\n", treetmp );
+
+ posinnew++;
}
-}
-void getGapPattern( float *fgcp, int clus, char **seq, double *eff, int len, char *xxx )
-{
- int i, j, gc, gb;
- float feff;
- float *fpt;
- char *spt;
-
- fpt = fgcp;
- i = len+1;
- while( i-- ) *fpt++ = 0.0;
- for( j=0; j<clus; j++ )
+ if( nstep )
{
- feff = (float)eff[j];
- fpt = fgcp;
- spt = seq[j];
- gc = ( *spt == '-' );
- i = len+1;
- while( i-- )
+ i--;
+ topolo0 = topol[i][0];
+ topolo1 = topol[i][1];
+ }
+ else
+ {
+// i = 0;
+// free( topol[i][0] );//?
+// free( topol[i][1] );//?
+// topol[i][0] = calloc( 2, sizeof( int ) );
+// topol[i][1] = calloc( 1, sizeof( int ) );
+// topol[i][0][0] = 0;
+// topol[i][0][1] = -1;
+// topol[i][1][0] = -1;
+
+ topoldum0 = calloc( 2, sizeof( int ) );
+ topoldum1 = calloc( 1, sizeof( int ) );
+ topoldum0[0] = 0;
+ topoldum0[1] = -1;
+ topoldum1[0] = -1;
+
+ topolo0 = topoldum0;
+ topolo1 = topoldum1;
+ }
+ if( repnorg == -1 )
+ {
+// topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topol[i][0] ) + countmem( topol[i][1] ) + 1 ) * sizeof( int ) );
+// intcpy( topolc[posinnew][0], topol[i][0] );
+// intcat( topolc[posinnew][0], topol[i][1] );
+ topolc[posinnew][0] = (int *)realloc( topolc[posinnew][0], ( countmem( topolo0 ) + countmem( topolo1 ) + 1 ) * sizeof( int ) );
+ intcpy( topolc[posinnew][0], topolo0 );
+ intcat( topolc[posinnew][0], topolo1 );
+// lenc[posinnew][0] = len[i][0] + len[i][1] - minscore / 2; // BUG!! 2014/06/07 ni hakken
+ if( nstep )
+ lenc[posinnew][0] = minscore / 2 - dep[nstep-1].distfromtip; // only when nstep>0, 2014/11/21
+ else
+ lenc[posinnew][0] = minscore / 2;
+
+// reporterr( "\ndep[nstep-1].distfromtip = %f\n", dep[nstep-1].distfromtip );
+// reporterr( "lenc[][0] = %f\n", lenc[posinnew][0] );
+
+ topolc[posinnew][1] = (int *)realloc( topolc[posinnew][1], 2 * sizeof( int ) );
+ intcpy( topolc[posinnew][1], additionaltopol );
+ lenc[posinnew][1] = minscore / 2;
+
+// neighbor = lastmem( topolc[posinnew][0] );
+ neighbor = norg-1; // hakkirishita neighbor ga inai baai saigo ni hyouji
+
+ if( treeout )
{
- gb = gc;
- gc = ( *++spt == '-' );
+#if 0
+ fp = fopen( "infile.tree", "a" ); // kyougou!!
+ if( fp == 0 )
{
- if( gb * !gc ) *fpt += feff;
- fpt++;
+ reporterr( "File error!\n" );
+ exit( 1 );
}
- }
-#if 0
- {
- gb = gc;
- gc = ( egappat[j] == '-' );
+ fprintf( fp, "\n" );
+ fprintf( fp, "%8d: %s\n", norg+iadd+1, name[norg+iadd] );
+ fprintf( fp, " nearest sequence: %d\n", nearest + 1 );
+ fprintf( fp, " cousin: " );
+ for( j=0; topolc[posinnew][0][j]!=-1; j++ )
+ fprintf( fp, "%d ", topolc[posinnew][0][j]+1 );
+ fprintf( fp, "\n" );
+ fclose( fp );
+#else
+ addtree[iadd].nearest = nearesto;
+ addtree[iadd].dist1 = minscoreo;
+ addtree[iadd].dist2 = minscore;
+ neighborlist[0] = 0;
+ npt = neighborlist;
+ for( j=0; topolc[posinnew][0][j]!=-1; j++ )
{
- if( gb * !gc ) *fpt += feff;
+ sprintf( npt, "%d ", topolc[posinnew][0][j]+1 );
+ npt += strlen( npt );
}
- }
+ addtree[iadd].neighbors = calloc( npt-neighborlist+1, sizeof( char ) );
+ strcpy( addtree[iadd].neighbors, neighborlist );
#endif
+ }
+
+// reporterr( "STEP %d\n", posinnew );
+// for( j=0; topolc[posinnew][0][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][0][j] );
+// reporterr( "\n len=%f", lenc[posinnew][0] );
+// for( j=0; topolc[posinnew][1][j]!=-1; j++ ) reporterr( " %d", topolc[posinnew][1][j] );
+// reporterr( "\n len=%f\n", lenc[posinnew][1] );
}
- for( j=0; j<len; j++ )
- {
- fprintf( stderr, "%d, %f\n", j, fgcp[j] );
- }
-}
-void getdigapfreq_st( float *freq, int clus, char **seq, double *eff, int len )
-{
- int i, j;
- float feff;
- for( i=0; i<len+1; i++ ) freq[i] = 0.0;
- for( i=0; i<clus; i++ )
+ if( topoldum0 ) free( topoldum0 );
+ if( topoldum1 ) free( topoldum1 );
+ free( leaf2node );
+ free( additionaltopol );
+ free( ac );
+ free( acori );
+ if( treeout ) free( neighborlist );
+
+#if 0 // create a newick tree for CHECK
+ char **tree;
+ char *treetmp;
+ int im, jm;
+
+ treetmp = AllocateCharVec( njob*150 );
+ tree = AllocateCharMtx( njob, njob*150 );
+ for( i=0; i<njobc; i++ ) sprintf( tree[i], "%d", i+1 );
+
+ for( i=0; i<njobc-1; i++ )
{
- feff = eff[i];
- if( 0 && seq[i][0] == '-' ) // machigai kamo
- freq[0] += feff;
- for( j=1; j<len; j++ )
- {
- if( seq[i][j] == '-' && seq[i][j-1] == '-' )
- freq[j] += feff;
- }
- if( 0 && seq[i][len-1] == '-' )
- freq[len] += feff;
+ reporterr( "\nSTEP %d\n", i );
+ for( j=0; topolc[i][0][j]!=-1; j++ ) reporterr( " %d", topolc[i][0][j] );
+ reporterr( "\n len=%f\n", lenc[i][0] );
+ for( j=0; topolc[i][1][j]!=-1; j++ ) reporterr( " %d", topolc[i][1][j] );
+ reporterr( "\n len=%f\n", lenc[i][1] );
+
+ im = topolc[i][0][0];
+ jm = topolc[i][1][0];
+ sprintf( treetmp, "(%s:%7.5f,%s:%7.5f)", tree[im], lenc[i][0], tree[jm], lenc[i][1] );
+ strcpy( tree[im], treetmp );
+
}
-// fprintf( stderr, "\ndigapf = \n" );
-// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
+
+ reporterr( "%s\n", treetmp );
+ FreeCharMtx( tree );
+ free( treetmp );
+#endif
+
+ return( neighbor );
}
-void getdiaminofreq_x( float *freq, int clus, char **seq, double *eff, int len )
+#if 0
+int samemember( int *mem, int *cand )
{
int i, j;
- float feff;
- for( i=0; i<len+2; i++ ) freq[i] = 0.0;
- for( i=0; i<clus; i++ )
+
+#if 0
+ reporterr( "mem = " );
+ for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] );
+ reporterr( "\n" );
+
+ reporterr( "cand = " );
+ for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] );
+ reporterr( "\n" );
+#endif
+
+ for( i=0, j=0; mem[i]>-1; )
{
- feff = eff[i];
- if( seq[i][0] != '-' ) // tadashii
- freq[0] += feff;
- for( j=1; j<len; j++ )
- {
- if( seq[i][j] != '-' && seq[i][j-1] != '-' )
- freq[j] += feff;
- }
- if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
- freq[len] += feff;
+ if( mem[i++] != cand[j++] ) return( 0 );
}
-// fprintf( stderr, "\ndiaaf = \n" );
-// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
-}
-void getdiaminofreq_st( float *freq, int clus, char **seq, double *eff, int len )
-{
- int i, j;
- float feff;
- for( i=0; i<len+1; i++ ) freq[i] = 0.0;
- for( i=0; i<clus; i++ )
+ if( cand[j] == -1 )
{
- feff = eff[i];
- if( seq[i][0] != '-' )
- freq[0] += feff;
- for( j=1; j<len; j++ )
- {
- if( seq[i][j] != '-' && seq[i][j-1] != '-' )
- freq[j] += feff;
- }
-// if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
- freq[len] += feff;
+ return( 1 );
}
-// fprintf( stderr, "\ndiaaf = \n" );
-// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
-}
-
-void getdigapfreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
-{
- int i, j;
- float feff;
- for( i=0; i<len+2; i++ ) freq[i] = 0.0;
- for( i=0; i<clus; i++ )
+ else
{
- feff = eff[i];
-// if( seq[i][0] == '-' )
- if( seq[i][0] == '-' && sgappat[i] == '-' )
- freq[0] += feff;
- for( j=1; j<len; j++ )
- {
- if( seq[i][j] == '-' && seq[i][j-1] == '-' )
- freq[j] += feff;
- }
-// if( seq[i][len] == '-' && seq[i][len-1] == '-' ) // xxx wo tsukawanaitoki arienai
- if( egappat[i] == '-' && seq[i][len-1] == '-' )
- freq[len] += feff;
+ return( 0 );
}
-// fprintf( stderr, "\ndigapf = \n" );
-// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
}
-
-void getdiaminofreq_part( float *freq, int clus, char **seq, double *eff, int len, char *sgappat, char *egappat )
+#else
+int samemember( int *mem, int *cand )
{
int i, j;
- float feff;
- for( i=0; i<len+2; i++ ) freq[i] = 0.0;
- for( i=0; i<clus; i++ )
+ int nm, nc;
+
+ nm = 0; for( i=0; mem[i]>-1; i++ ) nm++;
+ nc = 0; for( i=0; cand[i]>-1; i++ ) nc++;
+
+ if( nm != nc ) return( 0 );
+
+ for( i=0; mem[i]>-1; i++ )
{
- feff = eff[i];
- if( seq[i][0] != '-' && sgappat[i] != '-' )
- freq[0] += feff;
- for( j=1; j<len; j++ )
- {
- if( seq[i][j] != '-' && seq[i][j-1] != '-' )
- freq[j] += feff;
- }
-// if( 1 && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
- if( egappat[i] != '-' && seq[i][len-1] != '-' ) // xxx wo tsukawanaitoki [len-1] nomi
- freq[len] += feff;
+ for( j=0; cand[j]>-1; j++ )
+ if( mem[i] == cand[j] ) break;
+ if( cand[j] == -1 ) return( 0 );
}
-// fprintf( stderr, "\ndiaaf = \n" );
-// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
-}
-void getgapfreq_zure_part( float *freq, int clus, char **seq, double *eff, int len, char *sgap )
-{
- int i, j;
- float feff;
- for( i=0; i<len+2; i++ ) freq[i] = 0.0;
- for( i=0; i<clus; i++ )
+ if( mem[i] == -1 )
{
- feff = eff[i];
- if( sgap[i] == '-' )
- freq[0] += feff;
- for( j=0; j<len; j++ )
- {
- if( seq[i][j] == '-' )
- freq[j+1] += feff;
- }
-// if( egap[i] == '-' )
-// freq[len+1] += feff;
+#if 0
+ reporterr( "mem = " );
+ for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] );
+ reporterr( "\n" );
+
+ reporterr( "cand = " );
+ for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] );
+ reporterr( "\n" );
+#endif
+ return( 1 );
}
-// fprintf( stderr, "\ngapf = \n" );
-// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
-}
-
-void getgapfreq_zure( float *freq, int clus, char **seq, double *eff, int len )
-{
- int i, j;
- float feff;
- for( i=0; i<len+1; i++ ) freq[i] = 0.0;
- for( i=0; i<clus; i++ )
+ else
{
- feff = eff[i];
- for( j=0; j<len; j++ )
- {
- if( seq[i][j] == '-' )
- freq[j+1] += feff;
- }
+ return( 0 );
}
- freq[len+1] = 0.0;
-// fprintf( stderr, "\ngapf = \n" );
-// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
}
+#endif
-void getgapfreq( float *freq, int clus, char **seq, double *eff, int len )
+int samemembern( int *mem, int *cand, int nc )
{
int i, j;
- float feff;
- for( i=0; i<len+1; i++ ) freq[i] = 0.0;
- for( i=0; i<clus; i++ )
+ int nm;
+
+ nm = 0;
+ for( i=0; mem[i]>-1; i++ )
{
- feff = eff[i];
- for( j=0; j<len; j++ )
- {
- if( seq[i][j] == '-' )
- freq[j] += feff;
- }
+ nm++;
+ if( nm > nc ) return( 0 );
}
- freq[len] = 0.0;
-// fprintf( stderr, "\ngapf = \n" );
-// for( i=0; i<len+1; i++ ) fprintf( stderr, "%5.3f ", freq[i] );
-}
-void st_getGapPattern( Gappat **pat, int clus, char **seq, double *eff, int len )
-{
- int i, j, k, gb, gc;
- int known;
- float feff;
- Gappat **fpt;
- char *spt;
- int gaplen;
+ if( nm != nc ) return( 0 );
- fpt = pat;
- i = len+1;
- while( i-- )
+ for( i=0; mem[i]>-1; i++ )
{
- if( *fpt ) free( *fpt );
- *fpt++ = NULL;
+ for( j=0; j<nc; j++ )
+ if( mem[i] == cand[j] ) break;
+ if( j == nc ) return( 0 );
}
- for( j=0; j<clus; j++ )
+ if( mem[i] == -1 )
{
-// fprintf( stderr, "seq[%d] = %s\n", j, seq[j] );
- feff = (float)eff[j];
-
- fpt = pat;
- *fpt = NULL; // Falign.c kara yobareru tokiha chigau.
- spt = seq[j];
- gc = 0;
- gaplen = 0;
-
- for( i=0; i<len+1; i++ )
-// while( i-- )
- {
-// fprintf( stderr, "i=%d, gaplen = %d\n", i, gaplen );
- gb = gc;
- gc = ( i != len && *spt++ == '-' );
- if( gc )
- gaplen++;
- else
- {
- if( gb && gaplen )
- {
- k = 1;
- known = 0;
- if( *fpt ) for( ; (*fpt)[k].len != -1; k++ )
- {
- if( (*fpt)[k].len == gaplen )
- {
-// fprintf( stderr, "known\n" );
- known = 1;
- break;
- }
- }
-
- if( known == 0 )
- {
- *fpt = (Gappat *)realloc( *fpt, (k+3) * sizeof( Gappat ) ); // mae1 (total), ato2 (len0), term
- if( !*fpt )
- {
- fprintf( stderr, "Cannot allocate gappattern!'n" );
- fprintf( stderr, "Use an approximate method, with the --mafft5 option.\n" );
- exit( 1 );
- }
- (*fpt)[k].freq = 0.0;
- (*fpt)[k].len = gaplen;
- (*fpt)[k+1].len = -1;
- (*fpt)[k+1].freq = 0.0; // iranai
-// fprintf( stderr, "gaplen=%d, Unknown, %f\n", gaplen, (*fpt)[k].freq );
- }
-
-// fprintf( stderr, "adding pos %d, len=%d, k=%d, freq=%f->", i, gaplen, k, (*fpt)[k].freq );
- (*fpt)[k].freq += feff;
-// fprintf( stderr, "%f\n", (*fpt)[k].freq );
- gaplen = 0;
- }
- }
- fpt++;
- }
+#if 0
+ reporterr( "mem = " );
+ for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] );
+ reporterr( "\n" );
+
+ reporterr( "cand = " );
+ for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] );
+ reporterr( "\n" );
+#endif
+ return( 1 );
}
-#if 1
- for( j=0; j<len+1; j++ )
+ else
{
- if( pat[j] )
- {
-// fprintf( stderr, "j=%d\n", j );
-// for( i=1; pat[j][i].len!=-1; i++ )
-// fprintf( stderr, "pos=%d, i=%d, len=%d, freq=%f\n", j, i, pat[j][i].len, pat[j][i].freq );
+ return( 0 );
+ }
+}
- pat[j][0].len = 0; // iminashi
- pat[j][0].freq = 0.0;
- for( i=1; pat[j][i].len!=-1;i++ )
- {
- pat[j][0].freq += pat[j][i].freq;
-// fprintf( stderr, "totaling, i=%d, result = %f\n", i, pat[j][0].freq );
- }
-// fprintf( stderr, "totaled, result = %f\n", pat[j][0].freq );
- pat[j][i].freq = 1.0 - pat[j][0].freq;
- pat[j][i].len = 0; // imiari
- pat[j][i+1].len = -1;
- }
- else
- {
- pat[j] = (Gappat *)calloc( 3, sizeof( Gappat ) );
- pat[j][0].freq = 0.0;
- pat[j][0].len = 0; // iminashi
+int includemember( int *mem, int *cand ) // mem in cand
+{
+ int i, j;
- pat[j][1].freq = 1.0 - pat[j][0].freq;
- pat[j][1].len = 0; // imiari
- pat[j][2].len = -1;
- }
- }
+#if 0
+ reporterr( "mem = " );
+ for( i=0; mem[i]>-1; i++ ) reporterr( "%d ", mem[i] );
+ reporterr( "\n" );
+
+ reporterr( "cand = " );
+ for( i=0; cand[i]>-1; i++ ) reporterr( "%d ", cand[i] );
+ reporterr( "\n" );
#endif
+
+ for( i=0; mem[i]>-1; i++ )
+ {
+ for( j=0; cand[j]>-1; j++ )
+ if( mem[i] == cand[j] ) break;
+ if( cand[j] == -1 ) return( 0 );
+ }
+// reporterr( "INCLUDED! mem[0]=%d\n", mem[0] );
+ return( 1 );
}
-static void commongappickpair( char *r1, char *r2, char *i1, char *i2 )
+int overlapmember( int *mem1, int *mem2 )
{
-// strcpy( r1, i1 );
-// strcpy( r2, i2 );
-// return; // not SP
- while( *i1 )
+ int i, j;
+
+ for( i=0; mem1[i]>-1; i++ )
+ for( j=0; mem2[j]>-1; j++ )
+ if( mem1[i] == mem2[j] ) return( 1 );
+ return( 0 );
+}
+void gapcount( double *freq, char **seq, int nseq, double *eff, int lgth )
+{
+ int i, j;
+ double fr;
+
+// for( i=0; i<lgth; i++ ) freq[i] = 0.0;
+// return;
+
+ for( i=0; i<lgth; i++ )
{
- if( *i1 == '-' && *i2 == '-' )
- {
- i1++;
- i2++;
- }
- else
+ fr = 0.0;
+ for( j=0; j<nseq; j++ )
{
- *r1++ = *i1++;
- *r2++ = *i2++;
+ if( seq[j][i] == '-' ) fr += eff[j];
}
+ freq[i] = fr;
+// reporterr( "freq[%d] = %f\n", i, freq[i] );
}
- *r1 = 0;
- *r2 = 0;
+// reporterr( "\n" );
+ return;
}
-float naiveRpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
+void gapcountadd( double *freq, char **seq, int nseq, double *eff, int lgth )
{
-// return( 0 );
- int i, j;
- float val;
- float valf;
- int pv;
- double deff;
- char *p1, *p2, *p1p, *p2p;
- val = 0.0;
- for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+ int i;
+ int j = nseq-1;
+ double newfr = eff[j];
+ double orifr = 1.0 - newfr;
+
+// for( i=0; i<lgth; i++ ) freq[i] = 0.0;
+// return;
+// for( i=0; i<nseq; i++ )
+// reporterr( "%s\n", seq[i] );
+
+ for( i=0; i<lgth; i++ )
{
- deff = eff1[i] * eff2[j];
-// fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
-// fprintf( stderr, "i1 = %s\n", seq1[i] );
-// fprintf( stderr, "i2 = %s\n", seq2[j] );
-// fprintf( stderr, "s1 = %s\n", s1 );
-// fprintf( stderr, "s2 = %s\n", s2 );
-// fprintf( stderr, "penal = %d\n", penal );
+// reporterr( "freq[%d] = %f", i, freq[i] );
+ freq[i] = 1.0 - freq[i]; // modosu
+ freq[i] *= orifr;
- valf = 0;
- p1 = seq1[i]; p2 = seq2[j];
- pv = 0;
- if( *p1 == '-' && *p2 != '-' )
- pv = penal;
- if( *p1 != '-' && *p2 == '-' )
- pv = penal;
-// if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
- p1p = p1; p2p = p2;
- valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
- while( *p1p )
- {
- pv = 0;
- if( *p1p != '-' && *p2p != '-' )
- {
- if( *p1 == '-' && *p2 != '-' )
- pv = penal;
- if( *p1 != '-' && *p2 == '-' )
- pv = penal;
- if( *p1 != '-' && *p2 != '-' )
- ;
- if( *p1 == '-' && *p2 == '-' )
- ;
- }
- if( *p1p == '-' && *p2p == '-' )
- {
- if( *p1 == '-' && *p2 != '-' )
- pv = penal;
-// ;
- if( *p1 != '-' && *p2 == '-' )
- pv = penal;
-// ;
- if( *p1 != '-' && *p2 != '-' )
- ;
- if( *p1 == '-' && *p2 == '-' )
- ;
- }
- if( *p1p != '-' && *p2p == '-' )
- {
- if( *p1 == '-' && *p2 != '-' )
- pv = penal * 2; // ??
-// ;
- if( *p1 != '-' && *p2 == '-' )
- ;
- if( *p1 != '-' && *p2 != '-' )
- pv = penal;
-// ;
- if( *p1 == '-' && *p2 == '-' )
- pv = penal;
-// ;
- }
- if( *p1p == '-' && *p2p != '-' )
- {
- if( *p1 == '-' && *p2 != '-' )
- ;
- if( *p1 != '-' && *p2 == '-' )
- pv = penal * 2; // ??
-// ;
- if( *p1 != '-' && *p2 != '-' )
- pv = penal;
-// ;
- if( *p1 == '-' && *p2 == '-' )
- pv = penal;
-// ;
- }
-// fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
-// if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
- valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
- p1p++; p2p++;
- }
-// fprintf( stderr, "valf = %d\n", valf );
- val += deff * ( valf );
+ if( seq[j][i] == '-' ) freq[i] += newfr;
+// reporterr( "-> %f\n", i, freq[i] );
}
- fprintf( stderr, "val = %f\n", val );
- return( val );
-// exit( 1 );
+// reporterr( "\n" );
+ return;
}
-float naiveQpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
+void gapcountf( double *freq, char **seq, int nseq, double *eff, int lgth )
{
int i, j;
- float val;
- float valf;
- int pv;
- double deff;
- char *p1, *p2, *p1p, *p2p;
- return( 0 );
- val = 0.0;
- for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
- {
- deff = eff1[i] * eff2[j];
-// fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
-// fprintf( stderr, "i1 = %s\n", seq1[i] );
-// fprintf( stderr, "i2 = %s\n", seq2[j] );
-// fprintf( stderr, "s1 = %s\n", s1 );
-// fprintf( stderr, "s2 = %s\n", s2 );
-// fprintf( stderr, "penal = %d\n", penal );
+ double fr;
- valf = 0;
- p1 = seq1[i]; p2 = seq2[j];
- pv = 0;
- if( *p1 == '-' && *p2 != '-' )
- pv = penal;
- if( *p1 != '-' && *p2 == '-' )
- pv = penal;
-// if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
- p1p = p1; p2p = p2;
- valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
- while( *p1p )
+// for( i=0; i<lgth; i++ ) freq[i] = 0.0;
+// return;
+
+ for( i=0; i<lgth; i++ )
+ {
+ fr = 0.0;
+ for( j=0; j<nseq; j++ )
{
- pv = 0;
- if( *p1p != '-' && *p2p != '-' )
- {
- if( *p1 == '-' && *p2 != '-' )
- pv = penal;
- if( *p1 != '-' && *p2 == '-' )
- pv = penal;
- if( *p1 != '-' && *p2 != '-' )
- ;
- if( *p1 == '-' && *p2 == '-' )
- ;
- }
- if( *p1p == '-' && *p2p == '-' )
- {
- if( *p1 == '-' && *p2 != '-' )
-// pv = penal;
- ;
- if( *p1 != '-' && *p2 == '-' )
-// pv = penal;
- ;
- if( *p1 != '-' && *p2 != '-' )
- ;
- if( *p1 == '-' && *p2 == '-' )
- ;
- }
- if( *p1p != '-' && *p2p == '-' )
- {
- if( *p1 == '-' && *p2 != '-' )
- pv = penal * 2; // ??
-// ;
- if( *p1 != '-' && *p2 == '-' )
- ;
- if( *p1 != '-' && *p2 != '-' )
- pv = penal;
-// ;
- if( *p1 == '-' && *p2 == '-' )
-// pv = penal;
- ;
- }
- if( *p1p == '-' && *p2p != '-' )
- {
- if( *p1 == '-' && *p2 != '-' )
- ;
- if( *p1 != '-' && *p2 == '-' )
- pv = penal * 2; // ??
-// ;
- if( *p1 != '-' && *p2 != '-' )
- pv = penal;
-// ;
- if( *p1 == '-' && *p2 == '-' )
-// pv = penal;
- ;
- }
-// fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
-// if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
- valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
- p1p++; p2p++;
+ if( seq[j][i] == '-' ) fr += eff[j];
}
-// fprintf( stderr, "valf = %d\n", valf );
- val += deff * ( valf );
+ freq[i] = fr;
+// reporterr( "in gapcountf, freq[%d] = %f\n", i, freq[i] );
}
- fprintf( stderr, "val = %f\n", val );
- return( val );
-// exit( 1 );
+// reporterr( "\n" );
+ return;
}
-float naiveHpairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
+
+void outgapcount( double *freq, int nseq, char *gappat, double *eff )
{
- int i, j;
- float val;
- float valf;
- int pv;
-// float feff = 0.0; // by D.Mathog, a guess
- double deff;
- char *p1, *p2, *p1p, *p2p;
- val = 0.0;
- for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+ int j;
+ double fr;
+
+ fr = 0.0;
+ for( j=0; j<nseq; j++ )
{
- deff = eff1[i] * eff2[j];
-// fprintf( stderr, "i1 = %s\n", seq1[i] );
-// fprintf( stderr, "i2 = %s\n", seq2[j] );
-// fprintf( stderr, "s1 = %s\n", s1 );
-// fprintf( stderr, "s2 = %s\n", s2 );
-// fprintf( stderr, "penal = %d\n", penal );
+ if( gappat[j] == '-' ) fr += eff[j];
+ }
+ *freq = fr;
+ return;
+}
- valf = 0;
- p1 = seq1[i]; p2 = seq2[j];
- pv = 0;
- if( *p1 == '-' && *p2 != '-' )
- pv = penal;
- if( *p1 != '-' && *p2 == '-' )
- pv = penal;
- if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, (int)(p1-seq1[i]), (int)(p2-seq2[j]) );
- p1p = p1; p2p = p2;
- valf += (float)amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
- while( *p1p )
- {
- pv = 0;
- if( *p1p != '-' && *p2p != '-' )
- {
- if( *p1 == '-' && *p2 != '-' )
- pv = penal;
- if( *p1 != '-' && *p2 == '-' )
- pv = penal;
- if( *p1 != '-' && *p2 != '-' )
- ;
- if( *p1 == '-' && *p2 == '-' )
- ;
- }
- if( *p1p == '-' && *p2p == '-' )
- {
- if( *p1 == '-' && *p2 != '-' )
-// pv = penal;
- ;
- if( *p1 != '-' && *p2 == '-' )
-// pv = penal;
- ;
- if( *p1 != '-' && *p2 != '-' )
- ;
- if( *p1 == '-' && *p2 == '-' )
- ;
- }
- if( *p1p != '-' && *p2p == '-' )
- {
- if( *p1 == '-' && *p2 != '-' )
-// pv = penal;
- ;
- if( *p1 != '-' && *p2 == '-' )
- ;
- if( *p1 != '-' && *p2 != '-' )
- pv = penal;
- if( *p1 == '-' && *p2 == '-' )
-// pv = penal;
- ;
- }
- if( *p1p == '-' && *p2p != '-' )
- {
- if( *p1 == '-' && *p2 != '-' )
- ;
- if( *p1 != '-' && *p2 == '-' )
-// pv = penal;
- ;
- if( *p1 != '-' && *p2 != '-' )
- pv = penal;
- if( *p1 == '-' && *p2 == '-' )
-// pv = penal;
- ;
- }
-// fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
-// if( pv ) fprintf( stderr, "Penal!, %f, %d-%d, pos1,pos2=%d,%d\n", pv * deff * 0.5, i, j, p1-seq1[i], p2-seq2[j] );
- valf += amino_dis[(int)*p1++][(int)*p2++] + 0.5 * pv;
- p1p++; p2p++;
+double dist2offset( double dist )
+{
+ double val = dist * 0.5 - specificityconsideration; // dist ha 0..2 dakara
+// double val = dist * 1.0 - specificityconsideration; // dist ha 0..2 dakara
+ if( val > 0.0 ) val = 0.0;
+ return val;
+}
+
+void makedynamicmtx( double **out, double **in, double offset )
+{
+ int i, j, ii, jj;
+ double av;
+
+ offset = dist2offset( offset * 2.0 ); // offset 0..1 -> 0..2
+
+// if( offset > 0.0 ) offset = 0.0;
+// reporterr( "dynamic offset = %f\n", offset );
+
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<nalphabets; j++ )
+ {
+ out[i][j] = in[i][j];
+ }
+ if( offset == 0.0 ) return;
+
+ for( i=0; i<nalphabets; i++ )
+ {
+ ii = (int)amino[i];
+ if( ii == '-' ) continue; // text no toki arieru
+ for( j=0; j<nalphabets; j++ )
+ {
+ jj = (int)amino[j];
+ if( jj == '-' ) continue; // text no toki arieru
+ out[i][j] = in[i][j] + offset * 600;
+// reporterr( "%c-%c: %f\n", ii, jj, out[i][j] );
+ }
+ }
+
+// reporterr( "offset = %f\n", offset );
+// reporterr( "out[W][W] = %f\n", out[amino_n['W']][amino_n['W']] );
+// reporterr( "out[A][A] = %f\n", out[amino_n['A']][amino_n['A']] );
+
+
+ return;
+
+// Taikaku youso no heikin ga 600 ni naruyouni re-scale.
+// Hitaikaku youso ga ookiku narisugi.
+
+ av = 0.0;
+ for( i=0; i<nalphabets; i++ )
+ {
+ if( ii == '-' ) continue; // text no toki arieru
+ av += out[i][i];
+ }
+ av /= (double)nalphabets;
+
+ for( i=0; i<nalphabets; i++ )
+ {
+ if( amino[i] == '-' ) continue; // text no toki arieru
+ for( j=0; j<nalphabets; j++ )
+ {
+ if( amino[j] == '-' ) continue; // text no toki arieru
+ out[i][j] = out[i][j] * 600 / av;
+ reporterr( "%c-%c: %f\n", amino[i], amino[j], out[i][j] );
}
-// fprintf( stderr, "valf = %d\n", valf );
- val += deff * ( valf );
}
- fprintf( stderr, "val = %f\n", val );
- return( val );
-// exit( 1 );
+}
+void FreeCommonIP()
+{
+ if( commonIP ) FreeIntMtx( commonIP );
+ commonIP = NULL;
+ commonAlloc1 = 0;
+ commonAlloc2 = 0;
}
-float naivepairscore11( char *seq1, char *seq2, int penal )
+void makeskiptable( int n, int **skip, char **seq )
{
- float vali;
- int len = strlen( seq1 );
- char *s1, *s2, *p1, *p2;
- s1 = calloc( len+1, sizeof( char ) );
- s2 = calloc( len+1, sizeof( char ) );
+ char *nogapseq;
+ int nogaplen, alnlen;
+ int i, j, posinseq, gaplen;
+
+ nogapseq = calloc( strlen( seq[0] )+1, sizeof( char ) );
+ for( i=0; i<n; i++ )
{
- vali = 0.0;
- commongappickpair( s1, s2, seq1, seq2 );
-// fprintf( stderr, "###i1 = %s\n", seq1 );
-// fprintf( stderr, "###i2 = %s\n", seq2 );
-// fprintf( stderr, "###penal = %d\n", penal );
+ gappick0( nogapseq, seq[i] );
+ nogaplen = strlen( nogapseq );
+ alnlen = strlen( seq[i] );
+ skip[i] = calloc( nogaplen+1, sizeof( int ) );
- p1 = s1; p2 = s2;
- while( *p1 )
+// reporterr( "%s\n", nogapseq );
+
+ posinseq = 0;
+ gaplen = 0;
+ for( j=0; j<alnlen; j++ )
{
- if( *p1 == '-' )
+ if( seq[i][j] == '-' )
{
-// fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
- vali += (float)penal;
-// while( *p1 == '-' || *p2 == '-' )
- while( *p1 == '-' ) // SP
- {
- p1++;
- p2++;
- }
- continue;
+ skip[i][posinseq]++;
}
- if( *p2 == '-' )
+ else
{
-// fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
- vali += (float)penal;
-// while( *p2 == '-' || *p1 == '-' )
- while( *p2 == '-' ) // SP
- {
- p1++;
- p2++;
- }
- continue;
+ posinseq++;
}
-// fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
- vali += (float)amino_dis[(int)*p1++][(int)*p2++];
}
+// for( j=0; j<nogaplen+1; j++ )
+// reporterr( "%d ", skip[i][j] );
+// reporterr( "\n" );
+// exit( 1 );
}
- free( s1 );
- free( s2 );
-// fprintf( stderr, "###vali = %d\n", vali );
- return( vali );
+ free( nogapseq );
}
-float naivepairscore( int n1, int n2, char **seq1, char **seq2, double *eff1, double *eff2, int penal )
+int generatesubalignmentstable( int nseq, int ***tablept, int *nsubpt, int *maxmempt, int ***topol, double **len, double threshold )
{
-// return( 0.0 );
- int i, j;
- float val;
- int vali;
- float feff;
- int len = strlen( seq1[0] );
- char *s1, *s2, *p1, *p2;
- s1 = calloc( len+1, sizeof( char ) );
- s2 = calloc( len+1, sizeof( char ) );
- val = 0.0;
- for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+ int i, j, rep0, rep1, nmem, mem;
+ double distfromtip0, distfromtip1;
+ double *distfromtip;
+ reporterr( "\n\n\n" );
+
+ *maxmempt = 0;
+ *nsubpt = 0;
+
+ distfromtip = calloc( nseq, sizeof( double ) );
+ for( i=0; i<nseq-1; i++ )
{
- vali = 0;
- feff = eff1[i] * eff2[j];
-// fprintf( stderr, "feff %d-%d = %f\n", i, j, feff );
- commongappickpair( s1, s2, seq1[i], seq2[j] );
-// fprintf( stderr, "i1 = %s\n", seq1[i] );
-// fprintf( stderr, "i2 = %s\n", seq2[j] );
-// fprintf( stderr, "s1 = %s\n", s1 );
-// fprintf( stderr, "s2 = %s\n", s2 );
-// fprintf( stderr, "penal = %d\n", penal );
+#if 0
+ reporterr( "STEP %d\n", i );
+ for( j=0; topol[i][0][j]!=-1; j++ )
+ reporterr( "%3d ", topol[i][0][j] );
+ reporterr( "\n" );
+ reporterr( "len=%f\n", len[i][0] );
+#endif
- p1 = s1; p2 = s2;
- while( *p1 )
+ rep0 = topol[i][0][0];
+ distfromtip0 = distfromtip[rep0];
+ distfromtip[rep0] += len[i][0];
+// reporterr( "distfromtip[%d] = %f->%f\n", rep0, distfromtip0, distfromtip[rep0] );
+
+
+#if 0
+ for( j=0; topol[i][1][j]!=-1; j++ )
+ reporterr( "%3d ", topol[i][1][j] );
+ reporterr( "\n" );
+ reporterr( "len=%f\n", len[i][1] );
+#endif
+
+ rep1 = topol[i][1][0];
+ distfromtip1 = distfromtip[rep1];
+ distfromtip[rep1] += len[i][1];
+// reporterr( "distfromtip[%d] = %f->%f\n", rep1, distfromtip1, distfromtip[rep1] );
+
+ if( topol[i][0][1] != -1 && distfromtip0 <= threshold && threshold < distfromtip[rep0] )
{
- if( *p1 == '-' )
- {
-// fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
- vali += penal;
-// while( *p1 == '-' || *p2 == '-' )
- while( *p1 == '-' ) // SP
- {
- p1++;
- p2++;
- }
- continue;
- }
- if( *p2 == '-' )
- {
-// fprintf( stderr, "Penal! %c-%c in %d-%d, %f\n", *(p1-1), *(p2-1), i, j, feff );
- vali += penal;
-// while( *p2 == '-' || *p1 == '-' )
- while( *p2 == '-' ) // SP
- {
- p1++;
- p2++;
- }
- continue;
- }
-// fprintf( stderr, "adding %c-%c, %d\n", *p1, *p2, amino_dis[*p1][*p2] );
- vali += amino_dis[(int)*p1++][(int)*p2++];
+// reporterr( "HIT 0!\n" );
+ *tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) );
+ for( j=0, nmem=0; (mem=topol[i][0][j])!=-1; j++ )
+ nmem++;
+// reporterr( "allocating %d\n", nmem+1 );
+ (*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) );
+ (*tablept)[*nsubpt+1] = NULL;
+ intcpy( (*tablept)[*nsubpt], topol[i][0] );
+ if( *maxmempt < nmem ) *maxmempt = nmem;
+ *nsubpt += 1;
}
-// fprintf( stderr, "vali = %d\n", vali );
- val += feff * vali;
+
+ if( topol[i][1][1] != -1 && distfromtip1 <= threshold && threshold < distfromtip[rep1] )
+ {
+// reporterr( "HIT 1!\n" );
+ *tablept = realloc( *tablept, sizeof( char * ) * (*nsubpt+2) );
+ for( j=0, nmem=0; (mem=topol[i][1][j])!=-1; j++ )
+ nmem++;
+// reporterr( "allocating %d\n", nmem+1 );
+ (*tablept)[*nsubpt] = calloc( nmem+1, sizeof( int ) );
+ (*tablept)[*nsubpt+1] = NULL;
+ intcpy( (*tablept)[*nsubpt], topol[i][1] );
+ if( *maxmempt < nmem ) *maxmempt = nmem;
+ *nsubpt += 1;
+ }
+
}
- free( s1 );
- free( s2 );
- fprintf( stderr, "val = %f\n", val );
- return( val );
-// exit( 1 );
+
+ if( distfromtip[0] <= threshold )
+ {
+ free( distfromtip );
+ return( 1 );
+ }
+
+ free( distfromtip );
+ return( 0 );
}
-double plainscore( int nseq, char **s )
+
+
+double sumofpairsscore( int nseq, char **seq )
{
- int i, j, ilim;
- double v = 0.0;
+ double v = 0;
+ int i, j;
+ for( i=1; i<nseq; i++ )
+ {
+ for( j=0; j<i; j++ )
+ {
+ v += naivepairscore11( seq[i], seq[j], penalty ) / 600;
+ }
+ }
+// v /= ( (nseq-1) * nseq ) / 2;
+ return( v );
+}
+
+int commonsextet_p( int *table, int *pointt )
+{
+ int value = 0;
+ int tmp;
+ int point;
+ static TLS int *memo = NULL;
+ static TLS int *ct = NULL;
+ static TLS int *cp;
+
+ if( table == NULL )
+ {
+ if( memo ) free( memo );
+ if( ct ) free( ct );
+ memo = NULL;
+ ct = NULL;
+ return( 0 );
+ }
+
+ if( *pointt == -1 )
+ return( 0 );
+
+ if( !memo )
+ {
+ memo = (int *)calloc( tsize, sizeof( int ) );
+ if( !memo ) ErrorExit( "Cannot allocate memo\n" );
+ ct = (int *)calloc( MIN( maxl, tsize )+1, sizeof( int ) ); // chuui!!
+ if( !ct ) ErrorExit( "Cannot allocate ct\n" );
+ }
+
+ cp = ct;
+ while( ( point = *pointt++ ) != END_OF_VEC )
+ {
+ tmp = memo[point]++;
+ if( tmp < table[point] )
+ value++;
+ if( tmp == 0 ) *cp++ = point;
+ }
+ *cp = END_OF_VEC;
- ilim = nseq-1;
- for( i=0; i<ilim; i++ ) for( j=i+1; j<nseq; j++ )
+ cp = ct;
+ while( *cp != END_OF_VEC )
+ memo[*cp++] = 0;
+
+ return( value );
+}
+
+double distcompact_msa( char *seq1, char *seq2, int *skiptable1, int *skiptable2, int ss1, int ss2 ) // osoi!
+{
+ int bunbo = MIN( ss1, ss2 );
+ double value;
+
+// reporterr( "msa-based dist\n" );
+ if( bunbo == 0 )
+ return( 2.0 );
+ else
{
- v += (double)naivepairscore11( s[i], s[j], penalty );
+ value = ( 1.0 - (double)naivepairscorefast( seq1, seq2, skiptable1, skiptable2, penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast
+ if( value > 10 ) value = 10.0; // 2015/Mar/17
+ return( value );
}
+}
- fprintf( stderr, "penalty = %d\n", penalty );
+double distcompact( int len1, int len2, int *table1, int *point2, int ss1, int ss2 )
+{
+ double longer, shorter, lenfac, value;
- return( v );
+ if( len1 > len2 )
+ {
+ longer=(double)len1;
+ shorter=(double)len2;
+ }
+ else
+ {
+ longer=(double)len2;
+ shorter=(double)len1;
+ }
+ lenfac = 1.0 / ( shorter / longer * lenfacd + lenfacb / ( longer + lenfacc ) + lenfaca );
+// reporterr( "lenfac=%f\n", lenfac );
+// reporterr( "commonsextet_p()=%d\n", commonsextet_p( table1, point2 ) );
+// reporterr( "ss1=%d, ss2=%d\n", ss1, ss2 );
+// reporterr( "val=%f\n", (1.0-(double)commonsextet_p( table1, point2 )/ss1) );
+
+ if( ss1 == 0 || ss2 == 0 )
+ return( 2.0 );
+
+ value = ( 1.0 - (double)commonsextet_p( table1, point2 ) / MIN(ss1,ss2) ) * lenfac * 2.0;
+
+ return( value ); // 2013/Oct/17 -> 2bai
+}
+
+static void movereg( char *seq1, char *seq2, LocalHom *tmpptr, int *start1pt, int *start2pt, int *end1pt, int *end2pt )
+{
+ char *pt;
+ int tmpint;
+
+ pt = seq1;
+ tmpint = -1;
+ while( *pt != 0 )
+ {
+ if( *pt++ != '-' ) tmpint++;
+ if( tmpint == tmpptr->start1 ) break;
+ }
+ *start1pt = (int)( pt - seq1 ) - 1;
+
+ if( tmpptr->start1 == tmpptr->end1 ) *end1pt = *start1pt;
+ else
+ {
+ while( *pt != 0 )
+ {
+// fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] );
+ if( *pt++ != '-' ) tmpint++;
+ if( tmpint == tmpptr->end1 ) break;
+ }
+ *end1pt = (int)( pt - seq1 ) - 1;
+ }
+
+ pt = seq2;
+ tmpint = -1;
+ while( *pt != 0 )
+ {
+ if( *pt++ != '-' ) tmpint++;
+ if( tmpint == tmpptr->start2 ) break;
+ }
+ *start2pt = (int)( pt - seq2 ) - 1;
+ if( tmpptr->start2 == tmpptr->end2 ) *end2pt = *start2pt;
+ else
+ {
+ while( *pt != 0 )
+ {
+ if( *pt++ != '-' ) tmpint++;
+ if( tmpint == tmpptr->end2 ) break;
+ }
+ *end2pt = (int)( pt - seq2 ) - 1;
+ }
+}
+
+static void movereg_swap( char *seq1, char *seq2, LocalHom *tmpptr, int *start1pt, int *start2pt, int *end1pt, int *end2pt )
+{
+ char *pt;
+ int tmpint;
+
+
+ pt = seq1;
+ tmpint = -1;
+ while( *pt != 0 )
+ {
+ if( *pt++ != '-' ) tmpint++;
+ if( tmpint == tmpptr->start2 ) break;
+ }
+ *start1pt = (int)( pt - seq1 ) - 1;
+
+ if( tmpptr->start2 == tmpptr->end2 ) *end1pt = *start1pt;
+ else
+ {
+ while( *pt != 0 )
+ {
+// fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] );
+ if( *pt++ != '-' ) tmpint++;
+ if( tmpint == tmpptr->end2 ) break;
+ }
+ *end1pt = (int)( pt - seq1 ) - 1;
+ }
+
+ pt = seq2;
+ tmpint = -1;
+ while( *pt != 0 )
+ {
+ if( *pt++ != '-' ) tmpint++;
+ if( tmpint == tmpptr->start1 ) break;
+ }
+ *start2pt = (int)( pt - seq2 ) - 1;
+ if( tmpptr->start1 == tmpptr->end1 ) *end2pt = *start2pt;
+ else
+ {
+ while( *pt != 0 )
+ {
+ if( *pt++ != '-' ) tmpint++;
+ if( tmpint == tmpptr->end1 ) break;
+ }
+ *end2pt = (int)( pt - seq2 ) - 1;
+ }
}
+void fillimp( double **impmtx, double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, char *swaplist, int forscore, int *orinum1, int *orinum2 )
+{
+ int i, j, k1, k2, start1, start2, end1, end2;
+ double effij, effijx, effij_kozo;
+ char *pt1, *pt2;
+ LocalHom *tmpptr;
+ void (*movefunc)(char *, char *, LocalHom *, int *, int *, int *, int * );
+
+#if 0
+ fprintf( stderr, "eff1 in _init_strict = \n" );
+ for( i=0; i<clus1; i++ )
+ fprintf( stderr, "eff1[] = %f\n", eff1[i] );
+ for( i=0; i<clus2; i++ )
+ fprintf( stderr, "eff2[] = %f\n", eff2[i] );
+#endif
+
+ for( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )
+ impmtx[i][j] = 0.0;
+ effijx = 1.0 * fastathreshold;
+ for( i=0; i<clus1; i++ )
+ {
+ if( swaplist && swaplist[i] ) movefunc = movereg_swap;
+ else movefunc = movereg;
+ for( j=0; j<clus2; j++ )
+ {
+
+ if( swaplist == NULL && orinum1 && orinum2 ) // muda.
+ {
+ if( orinum1[i]>orinum2[j] )
+ movefunc = movereg_swap;
+ else
+ movefunc = movereg;
+ }
+
+// effij = eff1[i] * eff2[j] * effijx;
+ effij = eff1[i] * eff2[j] * effijx;
+ effij_kozo = eff1_kozo[i] * eff2_kozo[j] * effijx;
+ tmpptr = localhom[i][j];
+ while( tmpptr )
+ {
+// fprintf( stderr, "start1 = %d\n", tmpptr->start1 );
+// fprintf( stderr, "end1 = %d\n", tmpptr->end1 );
+// fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] );
+// fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] );
+
+ movefunc( seq1[i], seq2[j], tmpptr, &start1, &start2, &end1, &end2 );
+
+
+// fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] );
+// fprintf( stderr, "step 0\n" );
+ if( end1 - start1 != end2 - start2 )
+ {
+// fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
+ }
+
+ k1 = start1; k2 = start2;
+ pt1 = seq1[i] + k1;
+ pt2 = seq2[j] + k2;
+ while( *pt1 && *pt2 )
+ {
+ if( *pt1 != '-' && *pt2 != '-' )
+ {
+// ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£
+// impmtx[k1][k2] += tmpptr->wimportance * fastathreshold;
+// impmtx[k1][k2] += tmpptr->importance * effij;
+// impmtx[k1][k2] += tmpptr->fimportance * effij;
+ if( tmpptr->korh == 'k' )
+ impmtx[k1][k2] += tmpptr->importance * effij_kozo;
+ else
+ impmtx[k1][k2] += tmpptr->importance * effij;
+// fprintf( stderr, "k1=%d, k2=%d, impalloclen=%d\n", k1, k2, impalloclen );
+// fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
+ k1++; k2++;
+ pt1++; pt2++;
+ }
+ else if( *pt1 != '-' && *pt2 == '-' )
+ {
+// fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
+ k2++; pt2++;
+ }
+ else if( *pt1 == '-' && *pt2 != '-' )
+ {
+// fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
+ k1++; pt1++;
+ }
+ else if( *pt1 == '-' && *pt2 == '-' )
+ {
+// fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
+ k1++; pt1++;
+ k2++; pt2++;
+ }
+ if( k1 > end1 || k2 > end2 ) break;
+ }
+ tmpptr = tmpptr->next;
+ }
+ }
+ }
+#if 0
+ printf( "orinum1=%d, orinum2=%d\n", *orinum1, *orinum2 );
+ if( *orinum1 == 0 )
+ {
+ fprintf( stdout, "impmtx = \n" );
+ for( k2=0; k2<lgth2; k2++ )
+ fprintf( stdout, "%6.3f ", (double)k2 );
+ fprintf( stdout, "\n" );
+ for( k1=0; k1<lgth1; k1++ )
+ {
+ fprintf( stdout, "%d", k1 );
+ for( k2=0; k2<lgth2; k2++ )
+ fprintf( stdout, "%2.1f ", impmtx[k1][k2] );
+ fprintf( stdout, "\n" );
+ }
+ exit( 1 );
+ }
+#endif
+}
free( mtx );
}
-float *AllocateFloatVec( int l1 )
+double *AllocateFloatVec( int l1 )
{
- float *vec;
+ double *vec;
- vec = (float *)calloc( (unsigned int)l1, sizeof( float ) );
+ vec = (double *)calloc( (unsigned int)l1, sizeof( double ) );
if( vec == NULL )
{
fprintf( stderr, "Allocation error ( %d fload vec )\n", l1 );
return( vec );
}
-void FreeFloatVec( float *vec )
+void FreeFloatVec( double *vec )
{
free( (char *)vec );
}
-float **AllocateFloatHalfMtx( int ll1 )
+double **AllocateFloatHalfMtx( int ll1 )
{
- float **mtx;
+ double **mtx;
int i;
- mtx = (float **)calloc( (unsigned int)ll1+1, sizeof( float * ) );
+ mtx = (double **)calloc( (unsigned int)ll1+1, sizeof( double * ) );
if( mtx == NULL )
{
fprintf( stderr, "Allocation error ( %d fload halfmtx )\n", ll1 );
}
for( i=0; i<ll1; i++ )
{
- mtx[i] = (float *)calloc( ll1-i, sizeof( float ) );
+ mtx[i] = (double *)calloc( ll1-i, sizeof( double ) );
if( !mtx[i] )
{
- fprintf( stderr, "Allocation error( %d floathalfmtx )\n", ll1 );
+ fprintf( stderr, "Allocation error( %d doublehalfmtx )\n", ll1 );
exit( 1 );
}
}
return( mtx );
}
-float **AllocateFloatMtx( int ll1, int ll2 )
+double **AllocateFloatMtx( int ll1, int ll2 )
{
- float **mtx;
+ double **mtx;
int i;
- mtx = (float **)calloc( (unsigned int)ll1+1, sizeof( float * ) );
+ mtx = (double **)calloc( (unsigned int)ll1+1, sizeof( double * ) );
if( mtx == NULL )
{
fprintf( stderr, "Allocation error ( %d x %d fload mtx )\n", ll1, ll2 );
{
for( i=0; i<ll1; i++ )
{
- mtx[i] = (float *)calloc( ll2, sizeof( float ) );
+ mtx[i] = (double *)calloc( ll2, sizeof( double ) );
if( !mtx[i] )
{
- fprintf( stderr, "Allocation error( %d x %d floatmtx )\n", ll1, ll2 );
+ fprintf( stderr, "Allocation error( %d x %d doublemtx )\n", ll1, ll2 );
exit( 1 );
}
}
return( mtx );
}
-void FreeFloatHalfMtx( float **mtx, int n )
+void FreeFloatHalfMtx( double **mtx, int n )
{
int i;
for( i=0; i<n; i++ )
{
- if( mtx[i] ) FreeFloatVec( mtx[i] );
+ if( mtx[i] ) FreeFloatVec( mtx[i] ); mtx[i] = NULL;
}
free( mtx );
}
-void FreeFloatMtx( float **mtx )
+void FreeFloatMtx( double **mtx )
{
int i;
for( i=0; mtx[i]; i++ )
{
- FreeFloatVec( mtx[i] );
+ if( mtx[i] ) FreeFloatVec( mtx[i] ); mtx[i] = NULL;
}
free( mtx );
}
+int *AllocateIntVecLarge( unsigned long long ll1 )
+{
+ int *vec;
+
+ vec = (int *)calloc( ll1, sizeof( int ) );
+ if( vec == NULL )
+ {
+ fprintf( stderr, "Allocation error( %lld int vec )\n", ll1 );
+ exit( 1 );
+ }
+ return( vec );
+}
+
int *AllocateIntVec( int ll1 )
{
int *vec;
free( (char *)vec );
}
-float **AllocateFloatTri( int ll1 )
+double **AllocateFloatTri( int ll1 )
{
- float **tri;
+ double **tri;
int i;
- tri = (float **)calloc( (unsigned int)ll1+1, sizeof( float * ) );
+ tri = (double **)calloc( (unsigned int)ll1+1, sizeof( double * ) );
if( !tri )
{
- fprintf( stderr, "Allocation error ( float tri )\n" );
+ fprintf( stderr, "Allocation error ( double tri )\n" );
exit( 1 );
}
for( i=0; i<ll1; i++ )
return( tri );
}
-void FreeFloatTri( float **tri )
+void FreeFloatTri( double **tri )
{
/*
- float **x;
+ double **x;
x = tri;
while( *tri != NULL ) free( *tri++ );
free( x );
}
if( ll2 )
{
- for( i=0; i<ll1; i++ )
- {
- mtx[i] = AllocateIntVec( ll2 );
- }
+ for( i=0; i<ll1; i++ ) mtx[i] = AllocateIntVec( ll2 );
+ }
+ else
+ {
+ for( i=0; i<ll1; i++ ) mtx[i] = NULL;
+ }
+ mtx[ll1] = NULL;
+ return( mtx );
+}
+
+int **AllocateIntMtxLarge( unsigned long long ll1, unsigned long long ll2 )
+{
+ unsigned long long i;
+ int **mtx;
+
+ mtx = (int **)calloc( ll1+1, sizeof( int * ) );
+ if( !mtx )
+ {
+ fprintf( stderr, "Allocation error( %lld x %lld int mtx )\n", ll1, ll2 );
+ exit( 1 );
+ }
+ if( ll2 )
+ {
+ for( i=0; i<ll1; i++ ) mtx[i] = AllocateIntVecLarge( ll2 );
+ }
+ else
+ {
+ for( i=0; i<ll1; i++ ) mtx[i] = NULL;
}
mtx[ll1] = NULL;
return( mtx );
free( cub );
}
-void freeintmtx( int **mtx, int ll1, int ll2 )
+void freeintmtx( int **mtx, int ll1 )
{
int i;
- for( i=0; i<ll1; i++ )
- free( (char *)mtx[i] );
+ for( i=0; i<ll1; i++ ) free( (char *)mtx[i] );
free( (char *)mtx );
}
int i;
for( i=0; mtx[i]; i++ )
- free( (char *)mtx[i] );
+ {
+ if( mtx[i] ) free( (char *)mtx[i] ); mtx[i] = NULL;
+ }
free( (char *)mtx );
}
{
double *vec;
- vec = (double *)calloc( ll1, sizeof( double ) );
+ vec = (double *)calloc( ll1, sizeof( double ) ); // filled with 0.0
return( vec );
}
int i;
for( i=0; cub[i]; i++ )
{
- FreeIntMtx( cub[i] );
+ if( cub[i] ) FreeIntMtx( cub[i] ); cub[i] = NULL;
}
free( cub );
}
+double **AllocateDoubleHalfMtx( int ll1 )
+{
+ double **mtx;
+ int i;
+
+ mtx = (double **)calloc( (unsigned int)ll1+1, sizeof( double * ) );
+ if( mtx == NULL )
+ {
+ fprintf( stderr, "Allocation error ( %d double halfmtx )\n", ll1 );
+ exit( 1 );
+ }
+ for( i=0; i<ll1; i++ )
+ {
+ mtx[i] = (double *)calloc( ll1-i, sizeof( double ) );
+ if( !mtx[i] )
+ {
+ fprintf( stderr, "Allocation error( %d double halfmtx )\n", ll1 );
+ exit( 1 );
+ }
+ }
+ mtx[ll1] = NULL;
+ return( mtx );
+}
+
double **AllocateDoubleMtx( int ll1, int ll2 )
{
int i;
return mtx;
}
+void FreeDoubleHalfMtx( double **mtx, int n )
+{
+ int i;
+
+ for( i=0; i<n; i++ )
+ {
+ if( mtx[i] ) FreeFloatVec( mtx[i] ); mtx[i] = NULL;
+ }
+ free( mtx );
+}
void FreeDoubleMtx( double **mtx )
{
int i;
free( mtx );
}
-float ***AllocateFloatCub( int ll1, int ll2, int ll3 )
+double ***AllocateFloatCub( int ll1, int ll2, int ll3 )
{
int i;
- float ***cub;
+ double ***cub;
- cub = (float ***)calloc( ll1+1, sizeof( float ** ) );
+ cub = (double ***)calloc( ll1+1, sizeof( double ** ) );
if( !cub )
{
- fprintf( stderr, "cannot allocate float cube.\n" );
+ fprintf( stderr, "cannot allocate double cube.\n" );
exit( 1 );
}
for( i=0; i<ll1; i++ )
return( cub );
}
-void FreeFloatCub( float ***cub )
+void FreeFloatCub( double ***cub )
{
int i;
void ReallocateCharMtx( char **, int, int);
void FreeCharMtx( char ** );
-float *AllocateFloatVec( int );
-void FreeFloatVec( float * );
+double *AllocateFloatVec( int );
+void FreeFloatVec( double * );
-float **AllocateFloatHalfMtx( int );
-float **AllocateFloatMtx( int, int );
-void FreeFloatHalfMtx( float **, int );
-void FreeFloatMtx( float ** );
+double **AllocateFloatHalfMtx( int );
+double **AllocateFloatMtx( int, int );
+void FreeFloatHalfMtx( double **, int );
+void FreeFloatMtx( double ** );
-float **AlocateFloatTri( int );
-void FreeFloatTri( float ** );
+double **AlocateFloatTri( int );
+void FreeFloatTri( double ** );
int *AllocateIntVec( int );
+int *AllocateIntVecLarge( unsigned long long );
void FreeIntVec( int * );
int **AllocateIntMtx( int, int );
+int **AllocateIntMtxLarge( unsigned long long, unsigned long long );
void FreeIntMtx( int ** );
char ***AllocateCharCub( int, int, int );
double *AllocateDoubleVec( int );
void FreeDoubleVec( double * );
+double **AllocateDoubleHalfMtx( int );
double **AllocateDoubleMtx( int, int );
+void FreeDoubleHalfMtx( double **, int );
void FreeDoubleMtx( double ** );
double ***AllocateDoubleCub( int, int, int );
void FreeDoubleCub( double *** );
-float ***AllocateFloatCub( int, int, int );
-void FreeFloatCub( float *** );
+double ***AllocateFloatCub( int, int, int );
+void FreeFloatCub( double *** );
short *AllocateShortVec( int );
void FreeShortVec( short * );
short **AllocateShortMtx( int, int );
void FreeShortMtx( short ** );
+
+void freeintmtx( int **, int );
--argc;
goto nextoption;
case 't':
- nhomologs = atoi( *++argv );
+ nhomologs = myatoi( *++argv );
fprintf( stderr, "nhomologs = %d\n", nhomologs );
--argc;
goto nextoption;
case 'o':
- seedoffset = atoi( *++argv );
+ seedoffset = myatoi( *++argv );
fprintf( stderr, "seedoffset = %d\n", seedoffset );
--argc;
goto nextoption;
return( val );
}
-static void pairalign( char name[M][B], int nlen[M], char **seq, double *effarr, int alloclen )
+static void pairalign( char **name, int nlen[M], char **seq, double *effarr, int alloclen )
{
int i, j;
FILE *hat3p;
- float pscore = 0.0; // by D.Mathog
+ double pscore = 0.0; // by D.Mathog
static double *effarr1 = NULL;
static double *effarr2 = NULL;
char *aseq;
fprintf( stderr, "adding %d-%d\r", i, j );
- putlocalhom2( pseq[0], pseq[1], localhomtable[i]+j, 0, 0, (int)pscore, strlen( pseq[0] ) );
+ putlocalhom2( pseq[0], pseq[1], localhomtable[i]+j, 0, 0, (int)pscore, strlen( pseq[0] ), 'k' );
for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
{
if( tmpptr->opt == -1.0 ) continue;
int main( int argc, char *argv[] )
{
static int nlen[M];
- static char name[M][B], **seq;
+ static char **name, **seq;
static char **bseq;
static double *eff;
int i;
exit( 1 );
}
+ name = AllocateCharMtx( njob, B+1 );
seq = AllocateCharMtx( njob, nlenmax*9+1 );
bseq = AllocateCharMtx( njob, nlenmax*9+1 );
alloclen = nlenmax*9;
#if 0
Read( name, nlen, seq );
#else
- readData( infp, name, nlen, seq );
+ readData_pointer( infp, name, nlen, seq );
#endif
fclose( infp );
for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
- for( i=0; i<njob; i++ ) fprintf( stdout, ">_seed_%s\n%s\n", name[i]+1, bseq[i] );
+// for( i=0; i<njob; i++ ) fprintf( stdout, ">_seed_%s\n%s\n", name[i]+1, bseq[i] ); // CHUUI!!
+ for( i=0; i<njob; i++ ) fprintf( stdout, ">_seed_%s\n%s\n", name[i]+1, seq[i] );
pairalign( name, nlen, seq, eff, alloclen );
#! /usr/bin/env ruby
-#version 2, 2009/01/24
+#version 2, 2009/Jan/24
+#version 3, 2015/Dec/8
if ARGV.length == 1
#tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "").gsub(/:\-[0-9\.]+/, ":0.0" )
-tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "")
+#tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "")
+tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9\.]*[eE]-[0-9]*/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "")
STDERR.puts "Initial tree = " + tree
tree.sub!( /XXX/, memi[0].to_s )
- STDERR.puts "Tree = " + tree
+# STDERR.puts "Tree = " + tree
printf( "%5d %5d %10.5f %10.5f\n", memi[0], memi[1], leni[0], leni[1] )
static int usecache;
static char *whereispairalign;
+static char *odir;
+static char *pdir;
static double scale;
static int *alreadyoutput;
static int equivthreshold;
}
-static float calltmalign( char **mseq1, char **mseq2, double *equiv, char *fname1, char *chain1, char *fname2, char *chain2, int alloclen )
+static double calltmalign( char **mseq1, char **mseq2, double *equiv, char *fname1, char *chain1, char *fname2, char *chain2, int alloclen )
{
FILE *fp;
int res;
static char com[10000];
- float value;
+ double value;
char cachedir[10000];
char cachefile[10000];
int runnow;
// fprintf( stderr, "*mseq1 = %s\n", *mseq1 );
// fprintf( stderr, "*mseq2 = %s\n", *mseq2 );
- value = (float)naivepairscore11( *mseq1, *mseq2, penalty );
+ value = (double)naivepairscore11( *mseq1, *mseq2, penalty );
return( value );
}
-static float callrash( int mem1, int mem2, char **mseq1, char **mseq2, double *equiv, char *fname1, char *chain1, char *fname2, char *chain2, int alloclen )
+static double callrash( int mem1, int mem2, char **mseq1, char **mseq2, double *equiv, char *fname1, char *fname2, int alloclen )
{
FILE *fp;
- int res;
+// int res;
static char com[10000];
- float value;
+ double value;
char cachedir[10000];
char cachefile[10000];
int runnow;
if( usecache )
{
- sprintf( cachedir, "%s/.rashoutcache", getenv( "HOME" ) );
- sprintf( com, "mkdir -p %s", cachedir );
- system( com );
+// sprintf( cachedir, "tmp" );
+ sprintf( cachedir, "%s", pdir );
- sprintf( cachefile, "%s/%s%s-%s%s", cachedir, fname1, chain1, fname2, chain2 );
+ sprintf( cachefile, "%s/%s.%s.rash", cachedir, fname1, fname2 );
runnow = 0;
fp = fopen( cachefile, "r" );
- if( fp == NULL ) runnow = 1;
+ if( fp == NULL )
+ {
+ fprintf( stderr, "Cannot open %s\n", cachefile );
+ exit( 1 );
+ }
else
{
- fgets( com, 100, fp );
- if( strncmp( com, "successful", 10 ) ) runnow = 1;
fclose( fp );
}
}
else
{
- runnow = 1;
+ fprintf( stderr, "Not supported!\n" );
+ exit( 1 );
}
- if( runnow )
+#if 0
+ if( 0 )
{
#if 0
sprintf( com, "ln -s %s %s.pdb 2>_dum", fname1, fname1 );
}
else
+#endif
{
- fprintf( stderr, "Use cache!\n" );
- sprintf( com, "grep -v successful %s > %s.rashout", cachefile, pairid );
+ fprintf( stderr, "Use cache! cachefile = %s\n", cachefile );
+ sprintf( com, "cat %s > %s.rashout", cachefile, pairid );
system( com );
}
if( usecache && runnow )
{
- sprintf( com, "echo successful > %s", cachefile );
- system( com );
- sprintf( com, "cat %s.rashout >> %s", pairid, cachefile );
- system( com );
+ fprintf( stderr, "Okashii! usechache=%d, runnow=%d\n", usecache, runnow );
+ exit( 1 );
}
sprintf( com, "%s.rashout", pairid );
// fprintf( stderr, "*mseq2 = %s\n", *mseq2 );
- value = (float)naivepairscore11( *mseq1, *mseq2, penalty );
+ value = (double)naivepairscore11( *mseq1, *mseq2, penalty );
return( value );
}
int linenum, istr, nstr;
FILE *checkfp;
char *sline;
- int use[1000];
+ int use[10000];
linenum = 0;
nstr = 0;
while( 1 )
}
}
-static void prepareash( FILE *fp, char ***strfiles, char ***chainids, char ***seqpt, char ***mseq1pt, char ***mseq2pt, double **equivpt, int *alloclenpt )
+static void prepareash( FILE *fp, char *inputfile, char ***strfiles, char ***chainids, char ***seqpt, char ***mseq1pt, char ***mseq2pt, double **equivpt, int *alloclenpt )
{
int i, res;
char *dumseq;
char fname[1000];
char command[1000];
int linenum, istr, nstr;
- FILE *checkfp;
+// FILE *checkfp;
char *sline;
- int use[1000];
+ int use[10000];
linenum = 0;
nstr = 0;
+
+ fprintf( stderr, "inputfile = %s\n", inputfile );
while( 1 )
{
fgets( line, 999, fp );
continue;
}
extractfirstword( sline );
+#if 0
checkfp = fopen( sline, "r" );
if( checkfp == NULL )
{
fprintf( stderr, "Cannot open %s.\n", sline );
exit( 1 );
}
-#if 0
- fgets( linec, 999, checkfp );
- if( strncmp( "HEADER ", linec, 7 ) )
- {
- fprintf( stderr, "Check the format of %s.\n", sline );
- exit( 1 );
- }
-#endif
if( checkcbeta( checkfp ) )
{
fprintf( stderr, "%s has no C-beta atoms.\n", sline );
else
nstr++;
fclose( checkfp );
+#else
+ nstr++;
+#endif
linenum++;
}
njob = nstr;
fgets( line, 999, fp );
if( feof( fp ) ) break;
sline = strip( line );
+ fprintf( stderr, "sline = %s\n", sline );
if( use[linenum++] )
{
(*chainids)[istr][0] = getchainid( sline );
extractfirstword( sline );
sprintf( fname, "%s", sline );
cutpath( fname );
+#if 0
sprintf( command, "cp %s %s.pdb", sline, fname );
system( command );
sprintf( command, "perl \"%s/clean.pl\" %s.pdb", whereispairalign, fname );
fprintf( stderr, "error: Install clean.pl\n" );
exit( 1 );
}
+#endif
strcpy( (*strfiles)[istr++], fname );
}
}
alreadyoutput = AllocateIntVec( njob );
for( i=0; i<njob; i++ ) alreadyoutput[i] = 0;
+ fprintf( stderr, "Running pdp_ash_batch.pl..\n" );
+// sprintf( command, "/opt/protein/share/domains/code/pdp_ash/pdp_ash_batch.pl -f %s -d tmp -i %d", inputfile, wheretooutput );
+ sprintf( command, "/opt/protein/share/mafftash/pdp_ash/pdp_ash_batch.pl -f %s -d %s -i %s", inputfile, pdir, odir );
+ res = system( command );
+ if( res )
+ {
+ fprintf( stderr, "Ask KM!\n" );
+ exit( 1 );
+ }
+ fprintf( stderr, "done\n" );
+
+
for( i=0; i<istr; i++ )
{
fprintf( stderr, "i=%d\n", i );
(*mseq1pt)[0] = (*seqpt)[i];
(*mseq2pt)[0] = dumseq;
- callrash( i, i, *mseq1pt, *mseq2pt, *equivpt, (*strfiles)[i], (*chainids)[i], (*strfiles)[i], (*chainids)[i], *alloclenpt );
- fprintf( stdout, ">%d_%s-%s\n%s\n", i+1, (*strfiles)[i], (*chainids)[i], (*seqpt)[i] );
+ callrash( i, i, *mseq1pt, *mseq2pt, *equivpt, (*strfiles)[i], (*strfiles)[i], *alloclenpt );
+ fprintf( stdout, ">%d_%s\n%s\n", i+1, (*strfiles)[i], (*seqpt)[i] );
alreadyoutput[i] = 1;
}
}
fftThreshold = NOTSPECIFIED;
RNAppenalty = NOTSPECIFIED;
RNApthr = NOTSPECIFIED;
+ odir = "";
+ pdir = "";
while( --argc > 0 && (*++argv)[0] == '-' )
{
--argc;
goto nextoption;
case 'k':
- kimuraR = atoi( *++argv );
+ kimuraR = myatoi( *++argv );
// fprintf( stderr, "kimuraR = %d\n", kimuraR );
--argc;
goto nextoption;
case 'b':
- nblosum = atoi( *++argv );
+ nblosum = myatoi( *++argv );
scoremtx = 1;
// fprintf( stderr, "blosum %d\n", nblosum );
--argc;
goto nextoption;
case 'j':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = JTT;
fprintf( stderr, "jtt %d\n", pamN );
--argc;
goto nextoption;
case 'm':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = TM;
fprintf( stderr, "TM %d\n", pamN );
fprintf( stderr, "whereispairalign = %s\n", whereispairalign );
--argc;
goto nextoption;
+ case 'o':
+ odir = *++argv;
+ fprintf( stderr, "odir = %s\n", odir );
+ --argc;
+ goto nextoption;
+ case 'p':
+ pdir = *++argv;
+ fprintf( stderr, "pdir = %s\n", pdir );
+ --argc;
+ goto nextoption;
case 't':
- equivthreshold = atoi( *++argv );
+ equivthreshold = myatoi( *++argv );
--argc;
goto nextoption;
case 'w':
- equivwinsize = atoi( *++argv );
+ equivwinsize = myatoi( *++argv );
--argc;
goto nextoption;
case 'l':
- equivshortestlen = atoi( *++argv );
+ equivshortestlen = myatoi( *++argv );
--argc;
goto nextoption;
case 's':
/* modification end. */
#if 0
case 'z':
- fftThreshold = atoi( *++argv );
+ fftThreshold = myatoi( *++argv );
--argc;
goto nextoption;
case 'w':
- fftWinSize = atoi( *++argv );
+ fftWinSize = myatoi( *++argv );
--argc;
goto nextoption;
case 'Z':
return( val );
}
-static void pairalign( char name[M][B], int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double *equiv, double *effarr, char **strfiles, char **chainids, int alloclen )
+static void pairalign( char **name, int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double *equiv, double *effarr, char **strfiles, char **chainids, int alloclen )
{
int i, j, ilim;
int clus1, clus2;
int off1, off2;
- float pscore = 0.0; // by D.Mathog
+ double pscore = 0.0; // by D.Mathog
static char *indication1, *indication2;
FILE *hat2p, *hat3p;
static double **distancemtx;
strcpy( aseq[i], seq[i] );
strcpy( aseq[j], seq[j] );
- clus1 = conjuctionfortbfast( pair, i, aseq, mseq1, effarr1, effarr, indication1 );
- clus2 = conjuctionfortbfast( pair, j, aseq, mseq2, effarr2, effarr, indication2 );
+ clus1 = conjuctionfortbfast_old( pair, i, aseq, mseq1, effarr1, effarr, indication1 );
+ clus2 = conjuctionfortbfast_old( pair, j, aseq, mseq2, effarr2, effarr, indication2 );
// fprintf( stderr, "mseq1 = %s\n", mseq1[0] );
// fprintf( stderr, "mseq2 = %s\n", mseq2[0] );
break;
case( 'R' ):
fprintf( stderr, " Calling PDP_ASH.pl %d-%d/%d \r", i+1, j+1, njob );
- pscore = callrash( i, j, mseq1, mseq2, equiv, strfiles[i], chainids[i], strfiles[j], chainids[j], alloclen );
+ pscore = callrash( i, j, mseq1, mseq2, equiv, strfiles[i], strfiles[j], alloclen );
off1 = off2 = 0;
break;
ErrorExit( "ERROR IN SOURCE FILE" );
fprintf( stderr, "score = %10.2f (%d,%d)\n", pscore, i, j );
#endif
- putlocalhom_str( mseq1[0], mseq2[0], equiv, scale, localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) );
+ putlocalhom_str( mseq1[0], mseq2[0], equiv, scale, localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'k' );
#if 1
if( alreadyoutput[i] == 0 )
{
alreadyoutput[i] = 1;
gappick0( seq[i], mseq1[0] );
- fprintf( stdout, ">%d_%s-%s\n%s\n", i+1, strfiles[i], chainids[i], seq[i] );
+ fprintf( stdout, ">%d_%s\n%s\n", i+1, strfiles[i], seq[i] );
strcpy( checkseq[i], seq[i] );
}
else
hat2p = fopen( hat2file, "w" );
if( !hat2p ) ErrorExit( "Cannot open hat2." );
- WriteHat2( hat2p, njob, name, distancemtx );
+ WriteHat2_pointer( hat2p, njob, name, distancemtx );
fclose( hat2p );
fprintf( stderr, "##### writing hat3\n" );
int main( int argc, char *argv[] )
{
static int nlen[M];
- static char name[M][B], **seq;
+ static char **name, **seq;
static char **mseq1, **mseq2;
static char **aseq;
static char **bseq;
nlenmax = 10000; // tekitou
if( alg == 'R' )
- prepareash( infp, &strfiles, &chainids, &seq, &mseq1, &mseq2, &equiv, &alloclen );
+ prepareash( infp, inputfile, &strfiles, &chainids, &seq, &mseq1, &mseq2, &equiv, &alloclen );
else if( alg == 'T' )
preparetmalign( infp, &strfiles, &chainids, &seq, &mseq1, &mseq2, &equiv, &alloclen );
fclose( infp );
+ name = AllocateCharMtx( njob, B+1 );
aseq = AllocateCharMtx( njob, nlenmax*2+1 );
bseq = AllocateCharMtx( njob, nlenmax*2+1 );
eff = AllocateDoubleVec( njob );
#define DEBUG 0
#define IODEBUG 0
#define SCOREOUT 0
+#define SHISHAGONYU 0 // for debug
#define NODIST -9999
static int stdout_dist;
static int store_localhom;
static int store_dist;
+static int nadd;
+static int laste;
+static int lastm;
+static int lastsubopt;
+static int lastonce;
+static int usenaivescoreinsteadofalignmentscore;
+static int specifictarget;
+
+typedef struct _lastres
+{
+ int score;
+ int start1;
+ int start2;
+ char *aln1;
+ char *aln2;
+} Lastres;
+typedef struct _reg
+{
+ int start;
+ int end;
+} Reg;
+
+typedef struct _aln
+{
+ int nreg;
+ Reg *reg1;
+ Reg *reg2;
+} Aln;
+
+typedef struct _lastresx
+{
+ int score;
+ int naln;
+ Aln *aln;
+} Lastresx;
+
+#ifdef enablemultithread
typedef struct _jobtable
{
int i;
int j;
} Jobtable;
-#ifdef enablemultithread
typedef struct _thread_arg
{
int thread_no;
Jobtable *jobpospt;
char **name;
char **seq;
+ char **dseq;
+ int *thereisxineachseq;
LocalHom **localhomtable;
double **distancemtx;
double *selfscore;
char ***bpp;
+ Lastresx **lastresx;
int alloclen;
+ int *targetmap;
pthread_mutex_t *mutex_counter;
pthread_mutex_t *mutex_stdout;
} thread_arg_t;
#endif
+typedef struct _lastcallthread_arg
+{
+ int nq, nd;
+ char **dseq;
+ char **qseq;
+ Lastresx **lastresx;
+#ifdef enablemultithread
+ int thread_no;
+ int *kshare;
+ pthread_mutex_t *mutex;
+#endif
+} lastcallthread_arg_t;
+
static void t2u( char *seq )
{
while( *seq )
}
}
-static float recallpairfoldalign( char **mseq1, char **mseq2, int m1, int m2, int *of1pt, int *of2pt, int alloclen )
+static int removex( char *d, char *m )
+{
+ int val = 0;
+ while( *m != 0 )
+ {
+ if( *m == 'X' || *m == 'x' )
+ {
+ m++;
+ val++;
+ }
+ else
+ {
+ *d++ = *m++;
+ }
+ }
+ *d = 0;
+ return( val );
+}
+
+static void putlocalhom_last( char *s1, char *s2, LocalHom *localhompt, Lastresx *lastresx, char korh )
+{
+ char *pt1, *pt2;
+ int naln, nreg;
+ int iscore;
+ int isumscore;
+ int sumoverlap;
+ LocalHom *tmppt = localhompt;
+ LocalHom *tmppt2;
+ LocalHom *localhompt0;
+ Reg *rpt1, *rpt2;
+ Aln *apt;
+ int nlocalhom = 0;
+ int len;
+
+// fprintf( stderr, "s1=%s\n", s1 );
+// fprintf( stderr, "s2=%s\n", s2 );
+
+
+ naln = lastresx->naln;
+ apt = lastresx->aln;
+
+ if( naln == 0 ) return;
+ while( naln-- )
+ {
+ rpt1 = apt->reg1;
+ rpt2 = apt->reg2;
+ nreg = apt->nreg;
+ isumscore = 0;
+ sumoverlap = 0;
+ while( nreg-- )
+ {
+ if( nlocalhom++ > 0 )
+ {
+// fprintf( stderr, "reallocating ...\n" );
+ tmppt->next = (LocalHom *)calloc( 1, sizeof( LocalHom ) );
+// fprintf( stderr, "done\n" );
+ tmppt = tmppt->next;
+ tmppt->next = NULL;
+ }
+ tmppt->start1 = rpt1->start;
+ tmppt->start2 = rpt2->start;
+ tmppt->end1 = rpt1->end;
+ tmppt->end2 = rpt2->end;
+ tmppt->korh = 'h';
+ if( rpt1 == apt->reg1 ) localhompt0 = tmppt; // ?
+
+// fprintf( stderr, "in putlocalhom, reg1: %d-%d (nreg=%d)\n", rpt1->start, rpt1->end, lastresx->nreg );
+// fprintf( stderr, "in putlocalhom, reg2: %d-%d (nreg=%d)\n", rpt2->start, rpt2->end, lastresx->nreg );
+
+ len = tmppt->end1 - tmppt->start1 + 1;
+
+// fprintf( stderr, "tmppt->start1=%d\n", tmppt->start1 );
+// fprintf( stderr, "tmppt->start2=%d\n", tmppt->start2 );
+
+// fprintf( stderr, "s1+tmppt->start1=%*.*s\n", len, len, s1+tmppt->start1 );
+// fprintf( stderr, "s2+tmppt->start2=%*.*s\n", len, len, s2+tmppt->start2 );
+
+ pt1 = s1 + tmppt->start1;
+ pt2 = s2 + tmppt->start2;
+ iscore = 0;
+ while( len-- )
+ {
+ iscore += n_dis[(int)amino_n[(unsigned char)*pt1++]][(int)amino_n[(unsigned char)*pt2++]]; // - offset \e$B$O$$$i$J$$$+$b\e(B
+// fprintf( stderr, "len=%d, %c-%c, iscore(0) = %d\n", len, *(pt1-1), *(pt2-1), iscore );
+ }
+
+ if( divpairscore )
+ {
+ tmppt->overlapaa = tmppt->end2-tmppt->start2+1;
+ tmppt->opt = (double)iscore / tmppt->overlapaa * 5.8 / 600;
+ }
+ else
+ {
+ isumscore += iscore;
+ sumoverlap += tmppt->end2-tmppt->start2+1;
+ }
+ rpt1++;
+ rpt2++;
+ }
+#if 0
+ fprintf( stderr, "iscore (1)= %d\n", iscore );
+ fprintf( stderr, "al1: %d - %d\n", start1, end1 );
+ fprintf( stderr, "al2: %d - %d\n", start2, end2 );
+#endif
+
+ if( !divpairscore )
+ {
+ for( tmppt2=localhompt0; tmppt2; tmppt2=tmppt2->next )
+ {
+ tmppt2->overlapaa = sumoverlap;
+ tmppt2->opt = (double)isumscore * 5.8 / ( 600 * sumoverlap );
+// fprintf( stderr, "tmpptr->opt = %f\n", tmppt->opt );
+ }
+ }
+ apt++;
+ }
+}
+
+static int countcomma( char *s )
+{
+ int v = 0;
+ while( *s ) if( *s++ == ',' ) v++;
+ return( v );
+}
+
+static double recallpairfoldalign( char **mseq1, char **mseq2, int m1, int m2, int *of1pt, int *of2pt, int alloclen )
+{
+ static FILE *fp = NULL;
+ double value;
+ char *aln1;
+ char *aln2;
+ int of1tmp, of2tmp;
+
+ if( fp == NULL )
+ {
+ fp = fopen( "_foldalignout", "r" );
+ if( fp == NULL )
+ {
+ fprintf( stderr, "Cannot open _foldalignout\n" );
+ exit( 1 );
+ }
+ }
+
+ aln1 = calloc( alloclen, sizeof( char ) );
+ aln2 = calloc( alloclen, sizeof( char ) );
+
+ readpairfoldalign( fp, *mseq1, *mseq2, aln1, aln2, m1, m2, &of1tmp, &of2tmp, alloclen );
+
+ if( strstr( foldalignopt, "-global") )
+ {
+ fprintf( stderr, "Calling G__align11\n" );
+ value = G__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, outgap, outgap );
+ *of1pt = 0;
+ *of2pt = 0;
+ }
+ else
+ {
+ fprintf( stderr, "Calling L__align11\n" );
+ value = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, of1pt, of2pt );
+ }
+
+// value = (double)naivepairscore11( *mseq1, *mseq2, penalty ); // nennnotame
+
+ if( aln1[0] == 0 )
+ {
+ fprintf( stderr, "FOLDALIGN returned no alignment between %d and %d. Sequence alignment is used instead.\n", m1+1, m2+1 );
+ }
+ else
+ {
+ strcpy( *mseq1, aln1 );
+ strcpy( *mseq2, aln2 );
+ *of1pt = of1tmp;
+ *of2pt = of2tmp;
+ }
+
+// value = naivepairscore11( *mseq1, *mseq2, penalty ); // v6.511 ha kore wo tsukau, global nomi dakara.
+
+// fclose( fp ); // saigo dake yatta houga yoi.
+
+// fprintf( stderr, "*mseq1 = %s\n", *mseq1 );
+// fprintf( stderr, "*mseq2 = %s\n", *mseq2 );
+
+
+ free( aln1 );
+ free( aln2 );
+
+ return( value );
+}
+
+static void block2reg( char *block, Reg *reg1, Reg *reg2, int start1, int start2 )
+{
+ Reg *rpt1, *rpt2;
+ char *tpt, *npt;
+ int pos1, pos2;
+ int len, glen1, glen2;
+ pos1 = start1;
+ pos2 = start2;
+ rpt1 = reg1;
+ rpt2 = reg2;
+ while( block )
+ {
+ block++;
+// fprintf( stderr, "block = %s\n", block );
+ tpt = strchr( block, ':' );
+ npt = strchr( block, ',' );
+ if( !tpt || tpt > npt )
+ {
+ len = atoi( block );
+ reg1->start = pos1;
+ reg2->start = pos2;
+ pos1 += len - 1;
+ pos2 += len - 1;
+ reg1->end = pos1;
+ reg2->end = pos2;
+// fprintf( stderr, "in loop reg1: %d-%d\n", reg1->start, reg1->end );
+// fprintf( stderr, "in loop reg2: %d-%d\n", reg2->start, reg2->end );
+ reg1++;
+ reg2++;
+ }
+ else
+ {
+ sscanf( block, "%d:%d", &glen1, &glen2 );
+ pos1 += glen1 + 1;
+ pos2 += glen2 + 1;
+ }
+ block = npt;
+
+ }
+ reg1->start = reg1->end = reg2->start = reg2->end = -1;
+
+ while( rpt1->start != -1 )
+ {
+// fprintf( stderr, "reg1: %d-%d\n", rpt1->start, rpt1->end );
+// fprintf( stderr, "reg2: %d-%d\n", rpt2->start, rpt2->end );
+ rpt1++;
+ rpt2++;
+ }
+// *apt1 = *apt2 = 0;
+// fprintf( stderr, "aln1 = %s\n", aln1 );
+// fprintf( stderr, "aln2 = %s\n", aln2 );
+}
+
+
+static void readlastresx_singleq( FILE *fp, int n1, int nameq, Lastresx **lastresx )
+{
+ char *gett;
+ Aln *tmpaln;
+ int prevnaln, naln, nreg;
+#if 0
+ int i, pstart, pend, end1, end2;
+#endif
+ int score, name1, start1, alnSize1, seqSize1;
+ int name2, start2, alnSize2, seqSize2;
+ char strand1, strand2;
+ int includeintoscore;
+ gett = calloc( 10000, sizeof( char ) );
+
+// fprintf( stderr, "seq2[0] = %s\n", seq2[0] );
+// fprintf( stderr, "seq1[0] = %s\n", seq1[0] );
+
+ while( 1 )
+ {
+ fgets( gett, 9999, fp );
+ if( feof( fp ) ) break;
+ if( gett[0] == '#' ) continue;
+// fprintf( stdout, "gett = %s\n", gett );
+ if( gett[strlen(gett)-1] != '\n' )
+ {
+ fprintf( stderr, "Too long line?\n" );
+ exit( 1 );
+ }
+
+ sscanf( gett, "%d %d %d %d %c %d %d %d %d %c %d",
+ &score, &name1, &start1, &alnSize1, &strand1, &seqSize1,
+ &name2, &start2, &alnSize2, &strand2, &seqSize2 );
+
+ if( alg == 'R' && name2 <= name1 ) continue;
+ if( name2 != nameq )
+ {
+ fprintf( stderr, "BUG!!!\n" );
+ exit( 1 );
+ }
+
+// if( lastresx[name1][name2].score ) continue; // dame!!!!
+
+
+ prevnaln = lastresx[name1][name2].naln;
+#if 0
+ for( i=0; i<prevnaln; i++ )
+ {
+ nreg = lastresx[name1][name2].aln[i].nreg;
+
+ pstart = lastresx[name1][name2].aln[i].reg1[0].start + 0;
+ pend = lastresx[name1][name2].aln[i].reg1[nreg-1].end - 0;
+ end1 = start1 + alnSize1;
+// fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend );
+ if( pstart <= start1 && start1 <= pend && pend - start1 > 1 ) break;
+ if( pstart <= end1 && end1 <= pend && end1 - pstart > 1 ) break;
+
+ pstart = lastresx[name1][name2].aln[i].reg2[0].start + 0;
+ pend = lastresx[name1][name2].aln[i].reg2[nreg-1].end - 0;
+ end2 = start2 + alnSize2;
+// fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend );
+ if( pstart <= start2 && start2 <= pend && pend - start2 > 1 ) break;
+ if( pstart <= end2 && end2 <= pend && end2 - pstart > 1 ) break;
+ }
+ includeintoscore = ( i == prevnaln );
+#else
+ if( prevnaln ) includeintoscore = 0;
+ else includeintoscore = 1;
+#endif
+ if( !includeintoscore && !lastsubopt )
+ continue;
+
+ naln = prevnaln + 1;
+ lastresx[name1][name2].naln = naln;
+// fprintf( stderr, "OK! add this alignment to hat3, %d-%d, naln = %d->%d\n", name1, name2, prevnaln, naln );
+
+ if( ( tmpaln = (Aln *)realloc( lastresx[name1][name2].aln, (naln) * sizeof( Aln ) ) ) == NULL ) // yoyu nashi
+ {
+ fprintf( stderr, "Cannot reallocate lastresx[][].aln\n" );
+ exit( 1 );
+ }
+ else
+ lastresx[name1][name2].aln = tmpaln;
+
+ nreg = countcomma( gett )/2 + 1;
+ lastresx[name1][name2].aln[prevnaln].nreg = nreg;
+// lastresx[name1][name2].aln[naln].nreg = -1;
+// lastresx[name1][name2].aln[naln].reg1 = NULL;
+// lastresx[name1][name2].aln[naln].reg2 = NULL;
+// fprintf( stderr, "name1=%d, name2=%d, nreg=%d, prevnaln=%d\n", name1, name2, nreg, prevnaln );
+
+ if( ( lastresx[name1][name2].aln[prevnaln].reg1 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi
+ {
+ fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" );
+ exit( 1 );
+ }
+
+ if( ( lastresx[name1][name2].aln[prevnaln].reg2 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi
+ {
+ fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" );
+ exit( 1 );
+ }
+
+// lastresx[name1][name2].aln[prevnaln].reg1[0].start = -1; // iranai?
+// lastresx[name1][name2].aln[prevnaln].reg2[0].start = -1; // iranai?
+ block2reg( strrchr( gett, '\t' ), lastresx[name1][name2].aln[prevnaln].reg1, lastresx[name1][name2].aln[prevnaln].reg2, start1, start2 );
+
+ if( includeintoscore )
+ {
+ if( lastresx[name1][name2].score ) score += penalty;
+ lastresx[name1][name2].score += score;
+ }
+
+// fprintf( stderr, "score(%d,%d) = %d\n", name1, name2, lastresx[name1][name2].score );
+ }
+ free( gett );
+}
+
+#ifdef enablemultithread
+#if 0
+static void readlastresx_group( FILE *fp, Lastresx **lastresx )
+{
+ char *gett;
+ Aln *tmpaln;
+ int prevnaln, naln, nreg;
+#if 0
+ int i, pstart, pend, end1, end2;
+#endif
+ int score, name1, start1, alnSize1, seqSize1;
+ int name2, start2, alnSize2, seqSize2;
+ char strand1, strand2;
+ int includeintoscore;
+ gett = calloc( 10000, sizeof( char ) );
+
+// fprintf( stderr, "seq2[0] = %s\n", seq2[0] );
+// fprintf( stderr, "seq1[0] = %s\n", seq1[0] );
+
+ while( 1 )
+ {
+ fgets( gett, 9999, fp );
+ if( feof( fp ) ) break;
+ if( gett[0] == '#' ) continue;
+// fprintf( stdout, "gett = %s\n", gett );
+ if( gett[strlen(gett)-1] != '\n' )
+ {
+ fprintf( stderr, "Too long line?\n" );
+ exit( 1 );
+ }
+
+ sscanf( gett, "%d %d %d %d %c %d %d %d %d %c %d",
+ &score, &name1, &start1, &alnSize1, &strand1, &seqSize1,
+ &name2, &start2, &alnSize2, &strand2, &seqSize2 );
+
+ if( alg == 'R' && name2 <= name1 ) continue;
+
+// if( lastresx[name1][name2].score ) continue; // dame!!!!
+
+ prevnaln = lastresx[name1][name2].naln;
+#if 0
+ for( i=0; i<prevnaln; i++ )
+ {
+ nreg = lastresx[name1][name2].aln[i].nreg;
+
+ pstart = lastresx[name1][name2].aln[i].reg1[0].start + 0;
+ pend = lastresx[name1][name2].aln[i].reg1[nreg-1].end - 0;
+ end1 = start1 + alnSize1;
+// fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend );
+ if( pstart <= start1 && start1 <= pend && pend - start1 > 3 ) break;
+ if( pstart <= end1 && end1 <= pend && end1 - pstart > 3 ) break;
+
+ pstart = lastresx[name1][name2].aln[i].reg2[0].start + 0;
+ pend = lastresx[name1][name2].aln[i].reg2[nreg-1].end - 0;
+ end2 = start2 + alnSize2;
+// fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend );
+ if( pstart <= start2 && start2 <= pend && pend - start2 > 3 ) break;
+ if( pstart <= end2 && end2 <= pend && end2 - pstart > 3 ) break;
+ }
+ includeintoscore = ( i == prevnaln );
+#else
+ if( prevnaln ) includeintoscore = 0;
+ else includeintoscore = 1;
+#endif
+ if( !includeintoscore && !lastsubopt )
+ continue;
+
+ naln = prevnaln + 1;
+ lastresx[name1][name2].naln = naln;
+// fprintf( stderr, "OK! add this alignment to hat3, %d-%d, naln = %d->%d\n", name1, name2, prevnaln, naln );
+
+ if( ( tmpaln = (Aln *)realloc( lastresx[name1][name2].aln, (naln) * sizeof( Aln ) ) ) == NULL ) // yoyu nashi
+ {
+ fprintf( stderr, "Cannot reallocate lastresx[][].aln\n" );
+ exit( 1 );
+ }
+ else
+ lastresx[name1][name2].aln = tmpaln;
+
+
+
+ nreg = countcomma( gett )/2 + 1;
+ lastresx[name1][name2].aln[prevnaln].nreg = nreg;
+// lastresx[name1][name2].aln[naln].nreg = -1;
+// lastresx[name1][name2].aln[naln].reg1 = NULL;
+// lastresx[name1][name2].aln[naln].reg2 = NULL;
+// fprintf( stderr, "name1=%d, name2=%d, nreg=%d, prevnaln=%d\n", name1, name2, nreg, prevnaln );
+
+ if( ( lastresx[name1][name2].aln[prevnaln].reg1 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi
+ {
+ fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" );
+ exit( 1 );
+ }
+
+ if( ( lastresx[name1][name2].aln[prevnaln].reg2 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi
+ {
+ fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" );
+ exit( 1 );
+ }
+
+// lastresx[name1][name2].aln[prevnaln].reg1[0].start = -1; // iranai?
+// lastresx[name1][name2].aln[prevnaln].reg2[0].start = -1; // iranai?
+ block2reg( strrchr( gett, '\t' ), lastresx[name1][name2].aln[prevnaln].reg1, lastresx[name1][name2].aln[prevnaln].reg2, start1, start2 );
+
+ if( includeintoscore )
+ {
+ if( lastresx[name1][name2].score ) score += penalty;
+ lastresx[name1][name2].score += score;
+ }
+
+// fprintf( stderr, "score(%d,%d) = %d\n", name1, name2, lastresx[name1][name2].score );
+ }
+ free( gett );
+}
+#endif
+#endif
+
+static void readlastresx( FILE *fp, int n1, int n2, Lastresx **lastresx, char **seq1, char **seq2 )
+{
+ char *gett;
+ Aln *tmpaln;
+ int prevnaln, naln, nreg;
+#if 0
+ int i, pstart, pend, end1, end2;
+#endif
+ int score, name1, start1, alnSize1, seqSize1;
+ int name2, start2, alnSize2, seqSize2;
+ char strand1, strand2;
+ int includeintoscore;
+ gett = calloc( 10000, sizeof( char ) );
+
+// fprintf( stderr, "seq2[0] = %s\n", seq2[0] );
+// fprintf( stderr, "seq1[0] = %s\n", seq1[0] );
+
+ while( 1 )
+ {
+ fgets( gett, 9999, fp );
+ if( feof( fp ) ) break;
+ if( gett[0] == '#' ) continue;
+// fprintf( stdout, "gett = %s\n", gett );
+ if( gett[strlen(gett)-1] != '\n' )
+ {
+ fprintf( stderr, "Too long line?\n" );
+ exit( 1 );
+ }
+
+ sscanf( gett, "%d %d %d %d %c %d %d %d %d %c %d",
+ &score, &name1, &start1, &alnSize1, &strand1, &seqSize1,
+ &name2, &start2, &alnSize2, &strand2, &seqSize2 );
+
+ if( alg == 'R' && name2 <= name1 ) continue;
+
+// if( lastresx[name1][name2].score ) continue; // dame!!!!
+
+ prevnaln = lastresx[name1][name2].naln;
+#if 0
+ for( i=0; i<prevnaln; i++ )
+ {
+ nreg = lastresx[name1][name2].aln[i].nreg;
+
+ pstart = lastresx[name1][name2].aln[i].reg1[0].start + 0;
+ pend = lastresx[name1][name2].aln[i].reg1[nreg-1].end - 0;
+ end1 = start1 + alnSize1;
+// fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend );
+ if( pstart <= start1 && start1 <= pend && pend - start1 > 3 ) break;
+ if( pstart <= end1 && end1 <= pend && end1 - pstart > 3 ) break;
+
+ pstart = lastresx[name1][name2].aln[i].reg2[0].start + 0;
+ pend = lastresx[name1][name2].aln[i].reg2[nreg-1].end - 0;
+ end2 = start2 + alnSize2;
+// fprintf( stderr, "pstart = %d, pend = %d\n", pstart, pend );
+ if( pstart <= start2 && start2 <= pend && pend - start2 > 3 ) break;
+ if( pstart <= end2 && end2 <= pend && end2 - pstart > 3 ) break;
+ }
+ includeintoscore = ( i == prevnaln );
+#else
+ if( prevnaln ) includeintoscore = 0;
+ else includeintoscore = 1;
+#endif
+ if( !includeintoscore && !lastsubopt )
+ continue;
+
+ naln = prevnaln + 1;
+ lastresx[name1][name2].naln = naln;
+// fprintf( stderr, "OK! add this alignment to hat3, %d-%d, naln = %d->%d\n", name1, name2, prevnaln, naln );
+
+ if( ( tmpaln = (Aln *)realloc( lastresx[name1][name2].aln, (naln) * sizeof( Aln ) ) ) == NULL ) // yoyu nashi
+ {
+ fprintf( stderr, "Cannot reallocate lastresx[][].aln\n" );
+ exit( 1 );
+ }
+ else
+ lastresx[name1][name2].aln = tmpaln;
+
+
+
+ nreg = countcomma( gett )/2 + 1;
+ lastresx[name1][name2].aln[prevnaln].nreg = nreg;
+// lastresx[name1][name2].aln[naln].nreg = -1;
+// lastresx[name1][name2].aln[naln].reg1 = NULL;
+// lastresx[name1][name2].aln[naln].reg2 = NULL;
+// fprintf( stderr, "name1=%d, name2=%d, nreg=%d, prevnaln=%d\n", name1, name2, nreg, prevnaln );
+
+ if( ( lastresx[name1][name2].aln[prevnaln].reg1 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi
+ {
+ fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" );
+ exit( 1 );
+ }
+
+ if( ( lastresx[name1][name2].aln[prevnaln].reg2 = (Reg *)calloc( nreg+1, sizeof( Reg ) ) ) == NULL ) // yoyu nashi
+ {
+ fprintf( stderr, "Cannot reallocate lastresx[][].reg2\n" );
+ exit( 1 );
+ }
+
+// lastresx[name1][name2].aln[prevnaln].reg1[0].start = -1; // iranai?
+// lastresx[name1][name2].aln[prevnaln].reg2[0].start = -1; // iranai?
+ block2reg( strrchr( gett, '\t' ), lastresx[name1][name2].aln[prevnaln].reg1, lastresx[name1][name2].aln[prevnaln].reg2, start1, start2 );
+
+ if( includeintoscore )
+ {
+ if( lastresx[name1][name2].score ) score += penalty;
+ lastresx[name1][name2].score += score;
+ }
+
+// fprintf( stderr, "score(%d,%d) = %d\n", name1, name2, lastresx[name1][name2].score );
+ }
+ free( gett );
+}
+
+#ifdef enablemultithread
+#if 0
+static void *lastcallthread_group( void *arg )
+{
+ lastcallthread_arg_t *targ = (lastcallthread_arg_t *)arg;
+ int k, i;
+ int nq = targ->nq;
+ int nd = targ->nd;
+#ifdef enablemultithread
+ int thread_no = targ->thread_no;
+ int *kshare = targ->kshare;
+#endif
+ Lastresx **lastresx = targ->lastresx;
+ char **dseq = targ->dseq;
+ char **qseq = targ->qseq;
+ char command[5000];
+ FILE *lfp;
+ int msize;
+ int klim;
+ int qstart, qend, shou, amari;
+ char kd[1000];
+
+ if( nthread )
+ {
+ shou = nq / nthread;
+ amari = nq - shou * nthread;
+ fprintf( stderr, "shou: %d, amari: %d\n", shou, amari );
+
+ qstart = thread_no * shou;
+ if( thread_no - 1 < amari ) qstart += thread_no;
+ else qstart += amari;
+
+ qend = qstart + shou - 1;
+ if( thread_no < amari ) qend += 1;
+ fprintf( stderr, "%d: %d-%d\n", thread_no, qstart, qend );
+ }
+ k = -1;
+ while( 1 )
+ {
+ if( nthread )
+ {
+ if( qstart > qend ) break;
+ if( k == thread_no ) break;
+ fprintf( stderr, "\n%d-%d / %d (thread %d) \n", qstart, qend, nq, thread_no );
+ k = thread_no;
+ }
+ else
+ {
+ k++;
+ if( k == nq ) break;
+ fprintf( stderr, "\r%d / %d \r", k, nq );
+ }
+
+ if( alg == 'R' ) // if 'r' -> calllast_fast
+ {
+ fprintf( stderr, "Not supported\n" );
+ exit( 1 );
+ }
+ else // 'r'
+ {
+ kd[0] = 0;
+ }
+
+ sprintf( command, "_q%d", k );
+ lfp = fopen( command, "w" );
+ if( !lfp )
+ {
+ fprintf( stderr, "Cannot open %s", command );
+ exit( 1 );
+ }
+ for( i=qstart; i<=qend; i++ )
+ fprintf( lfp, ">%d\n%s\n", i, qseq[i] );
+ fclose( lfp );
+
+// if( alg == 'R' ) msize = MAX(10,k+nq);
+// else msize = MAX(10,nd+nq);
+ if( alg == 'R' ) msize = MAX(10,k*lastm);
+ else msize = MAX(10,nd*lastm);
+
+// fprintf( stderr, "Calling lastal from lastcallthread, msize = %d, k=%d\n", msize, k );
+// sprintf( command, "grep '>' _db%sd", kd );
+// system( command );
+ sprintf( command, "%s/lastal -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db%sd _q%d > _lastres%d", whereispairalign, msize, laste, -penalty, -penalty_ex, kd, k, k );
+ if( system( command ) ) exit( 1 );
+
+ sprintf( command, "_lastres%d", k );
+ lfp = fopen( command, "r" );
+ if( !lfp )
+ {
+ fprintf( stderr, "Cannot read _lastres%d", k );
+ exit( 1 );
+ }
+// readlastres( lfp, nd, nq, lastres, dseq, qseq );
+// fprintf( stderr, "Reading lastres\n" );
+ readlastresx_group( lfp, lastresx );
+ fclose( lfp );
+ }
+ return( NULL );
+}
+#endif
+#endif
+
+static void *lastcallthread( void *arg )
+{
+ lastcallthread_arg_t *targ = (lastcallthread_arg_t *)arg;
+ int k, i;
+ int nq = targ->nq;
+ int nd = targ->nd;
+#ifdef enablemultithread
+ int thread_no = targ->thread_no;
+ int *kshare = targ->kshare;
+#endif
+ Lastresx **lastresx = targ->lastresx;
+ char **dseq = targ->dseq;
+ char **qseq = targ->qseq;
+ char command[5000];
+ FILE *lfp;
+ int msize;
+ int klim;
+ char kd[1000];
+
+ k = -1;
+ while( 1 )
+ {
+
+#ifdef enablemultithread
+ if( nthread )
+ {
+ pthread_mutex_lock( targ->mutex );
+ k = *kshare;
+ if( k == nq )
+ {
+ pthread_mutex_unlock( targ->mutex );
+ break;
+ }
+ fprintf( stderr, "\r%d / %d (thread %d) \r", k, nq, thread_no );
+ ++(*kshare);
+ pthread_mutex_unlock( targ->mutex );
+ }
+ else
+#endif
+ {
+ k++;
+ if( k == nq ) break;
+ fprintf( stderr, "\r%d / %d \r", k, nq );
+ }
+
+ if( alg == 'R' ) // if 'r' -> calllast_fast
+ {
+ klim = MIN( k, njob-nadd );
+// klim = k; // dochira demo yoi
+ if( klim == k )
+ {
+ sprintf( command, "_db%dd", k );
+ lfp = fopen( command, "w" );
+ if( !lfp )
+ {
+ fprintf( stderr, "Cannot open _db." );
+ exit( 1 );
+ }
+ for( i=0; i<klim; i++ ) fprintf( lfp, ">%d\n%s\n", i, dseq[i] );
+ fclose( lfp );
+
+// sprintf( command, "md5sum _db%dd > /dev/tty", k );
+// system( command );
+
+ if( dorp == 'd' )
+ sprintf( command, "%s/lastdb _db%dd _db%dd", whereispairalign, k, k );
+ else
+ sprintf( command, "%s/lastdb -p _db%dd _db%dd", whereispairalign, k, k );
+ system( command );
+ sprintf( kd, "%d", k );
+ }
+ else // calllast_fast de tsukutta nowo riyou
+ {
+ kd[0] = 0;
+// fprintf( stderr, "klim=%d, njob=%d, nadd=%d, skip!\n", klim, njob, nadd );
+ }
+ }
+ else // 'r'
+ {
+ kd[0] = 0;
+ }
+
+ sprintf( command, "_q%d", k );
+ lfp = fopen( command, "w" );
+ if( !lfp )
+ {
+ fprintf( stderr, "Cannot open %s", command );
+ exit( 1 );
+ }
+ fprintf( lfp, ">%d\n%s\n", k, qseq[k] );
+ fclose( lfp );
+
+// if( alg == 'R' ) msize = MAX(10,k+nq);
+// else msize = MAX(10,nd+nq);
+ if( alg == 'R' ) msize = MAX(10,k*lastm);
+ else msize = MAX(10,nd*lastm);
+
+// fprintf( stderr, "Calling lastal from lastcallthread, msize = %d, k=%d\n", msize, k );
+// sprintf( command, "grep '>' _db%sd", kd );
+// system( command );
+ sprintf( command, "%s/lastal -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db%sd _q%d > _lastres%d", whereispairalign, msize, laste, -penalty, -penalty_ex, kd, k, k );
+ if( system( command ) ) exit( 1 );
+
+ sprintf( command, "_lastres%d", k );
+ lfp = fopen( command, "r" );
+ if( !lfp )
+ {
+ fprintf( stderr, "Cannot read _lastres%d", k );
+ exit( 1 );
+ }
+// readlastres( lfp, nd, nq, lastres, dseq, qseq );
+// fprintf( stderr, "Reading lastres\n" );
+ readlastresx_singleq( lfp, nd, k, lastresx );
+ fclose( lfp );
+ }
+ return( NULL );
+}
+
+
+static void calllast_fast( int nd, char **dseq, int nq, char **qseq, Lastresx **lastresx )
{
- static FILE *fp = NULL;
- float value;
- char *aln1;
- char *aln2;
- int of1tmp, of2tmp;
+ int i, j;
+ FILE *lfp;
+ char command[1000];
- if( fp == NULL )
+ lfp = fopen( "_scoringmatrixforlast", "w" );
+ if( !lfp )
{
- fp = fopen( "_foldalignout", "r" );
- if( fp == NULL )
+ fprintf( stderr, "Cannot open _scoringmatrixforlast" );
+ exit( 1 );
+ }
+ if( dorp == 'd' )
+ {
+ fprintf( lfp, " " );
+ for( j=0; j<4; j++ ) fprintf( lfp, " %c ", amino[j] );
+ fprintf( lfp, "\n" );
+ for( i=0; i<4; i++ )
{
- fprintf( stderr, "Cannot open _foldalignout\n" );
- exit( 1 );
+ fprintf( lfp, "%c ", amino[i] );
+ for( j=0; j<4; j++ ) fprintf( lfp, " %d ", n_dis[i][j] );
+ fprintf( lfp, "\n" );
+ }
+ }
+ else
+ {
+ fprintf( lfp, " " );
+ for( j=0; j<20; j++ ) fprintf( lfp, " %c ", amino[j] );
+ fprintf( lfp, "\n" );
+ for( i=0; i<20; i++ )
+ {
+ fprintf( lfp, "%c ", amino[i] );
+ for( j=0; j<20; j++ ) fprintf( lfp, " %d ", n_dis[i][j] );
+ fprintf( lfp, "\n" );
}
}
+ fclose( lfp );
- aln1 = calloc( alloclen, sizeof( char ) );
- aln2 = calloc( alloclen, sizeof( char ) );
+// if( alg == 'r' ) // if 'R' -> lastcallthread, kokonoha nadd>0 no toki nomi shiyou
+ {
+ sprintf( command, "_dbd" );
+ lfp = fopen( command, "w" );
+ if( !lfp )
+ {
+ fprintf( stderr, "Cannot open _dbd" );
+ exit( 1 );
+ }
+ if( alg == 'R' )
+ j = njob-nadd;
+ else
+ j = nd;
+ for( i=0; i<j; i++ ) fprintf( lfp, ">%d\n%s\n", i, dseq[i] );
- readpairfoldalign( fp, *mseq1, *mseq2, aln1, aln2, m1, m2, &of1tmp, &of2tmp, alloclen );
+ fclose( lfp );
+ if( dorp == 'd' )
+ sprintf( command, "%s/lastdb _dbd _dbd", whereispairalign );
+ else
+ sprintf( command, "%s/lastdb -p _dbd _dbd", whereispairalign );
+ system( command );
+ }
- if( strstr( foldalignopt, "-global") )
+#ifdef enablemultithread
+ if( nthread )
{
- fprintf( stderr, "Calling G__align11\n" );
- value = G__align11( mseq1, mseq2, alloclen, outgap, outgap );
- *of1pt = 0;
- *of2pt = 0;
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+ lastcallthread_arg_t *targ;
+ int *ksharept;
+ targ = (lastcallthread_arg_t *)calloc( nthread, sizeof( lastcallthread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ ksharept = calloc( 1, sizeof(int) );
+ *ksharept = 0;
+ pthread_mutex_init( &mutex, NULL );
+ for( i=0; i<nthread; i++ )
+ {
+ targ[i].thread_no = i;
+ targ[i].kshare = ksharept;
+ targ[i].nq = nq;
+ targ[i].nd = nd;
+ targ[i].dseq = dseq;
+ targ[i].qseq = qseq;
+ targ[i].lastresx = lastresx;
+ targ[i].mutex = &mutex;
+ pthread_create( handle+i, NULL, lastcallthread, (void *)(targ+i) );
+ }
+
+ for( i=0; i<nthread; i++ )
+ {
+ pthread_join( handle[i], NULL );
+ }
+ pthread_mutex_destroy( &mutex );
+ free( handle );
+ free( targ );
+ free( ksharept );
}
else
+#endif
{
- fprintf( stderr, "Calling L__align11\n" );
- value = L__align11( mseq1, mseq2, alloclen, of1pt, of2pt );
+ lastcallthread_arg_t *targ;
+ targ = (lastcallthread_arg_t *)calloc( 1, sizeof( lastcallthread_arg_t ) );
+ targ[0].nq = nq;
+ targ[0].nd = nd;
+ targ[0].dseq = dseq;
+ targ[0].qseq = qseq;
+ targ[0].lastresx = lastresx;
+ lastcallthread( targ );
+ free( targ );
}
-// value = (float)naivepairscore11( *mseq1, *mseq2, penalty ); // nennnotame
+}
- if( aln1[0] == 0 )
+static void calllast_once( int nd, char **dseq, int nq, char **qseq, Lastresx **lastresx )
+{
+ int i, j;
+ char command[5000];
+ FILE *lfp;
+ int msize;
+ int res;
+
+ fprintf( stderr, "nq=%d\n", nq );
+
+ lfp = fopen( "_db", "w" );
+ if( !lfp )
{
- fprintf( stderr, "FOLDALIGN returned no alignment between %d and %d. Sequence alignment is used instead.\n", m1+1, m2+1 );
+ fprintf( stderr, "Cannot open _db" );
+ exit( 1 );
+ }
+ for( i=0; i<nd; i++ ) fprintf( lfp, ">%d\n%s\n", i, dseq[i] );
+ fclose( lfp );
+
+ if( dorp == 'd' )
+ {
+ sprintf( command, "%s/lastdb _db _db", whereispairalign );
+ system( command );
+ lfp = fopen( "_scoringmatrixforlast", "w" );
+ if( !lfp )
+ {
+ fprintf( stderr, "Cannot open _scoringmatrixforlast" );
+ exit( 1 );
+ }
+ fprintf( lfp, " " );
+ for( j=0; j<4; j++ ) fprintf( lfp, " %c ", amino[j] );
+ fprintf( lfp, "\n" );
+ for( i=0; i<4; i++ )
+ {
+ fprintf( lfp, "%c ", amino[i] );
+ for( j=0; j<4; j++ ) fprintf( lfp, " %d ", n_dis[i][j] );
+ fprintf( lfp, "\n" );
+ }
+ fclose( lfp );
+#if 0
+ sprintf( command, "lastex -s 2 -a %d -b %d -p _scoringmatrixforlast -E 10000 _db.prj _db.prj > _lastex", -penalty, -penalty_ex );
+ system( command );
+ lfp = fopen( "_lastex", "r" );
+ fgets( command, 4999, lfp );
+ fgets( command, 4999, lfp );
+ fgets( command, 4999, lfp );
+ fgets( command, 4999, lfp );
+ laste = atoi( command );
+ fclose( lfp );
+ fprintf( stderr, "laste = %d\n", laste );
+ sleep( 10 );
+#else
+// laste = 5000;
+#endif
}
else
{
- strcpy( *mseq1, aln1 );
- strcpy( *mseq2, aln2 );
- *of1pt = of1tmp;
- *of2pt = of2tmp;
+ sprintf( command, "%s/lastdb -p _db _db", whereispairalign );
+ system( command );
+ lfp = fopen( "_scoringmatrixforlast", "w" );
+ if( !lfp )
+ {
+ fprintf( stderr, "Cannot open _scoringmatrixforlast" );
+ exit( 1 );
+ }
+ fprintf( lfp, " " );
+ for( j=0; j<20; j++ ) fprintf( lfp, " %c ", amino[j] );
+ fprintf( lfp, "\n" );
+ for( i=0; i<20; i++ )
+ {
+ fprintf( lfp, "%c ", amino[i] );
+ for( j=0; j<20; j++ ) fprintf( lfp, " %d ", n_dis[i][j] );
+ fprintf( lfp, "\n" );
+ }
+ fclose( lfp );
+// fprintf( stderr, "Not written yet\n" );
}
-// value = naivepairscore11( *mseq1, *mseq2, penalty ); // v6.511 ha kore wo tsukau, global nomi dakara.
-
-// fclose( fp ); // saigo dake yatta houga yoi.
-
-// fprintf( stderr, "*mseq1 = %s\n", *mseq1 );
-// fprintf( stderr, "*mseq2 = %s\n", *mseq2 );
+ lfp = fopen( "_q", "w" );
+ if( !lfp )
+ {
+ fprintf( stderr, "Cannot open _q" );
+ exit( 1 );
+ }
+ for( i=0; i<nq; i++ )
+ {
+ fprintf( lfp, ">%d\n%s\n", i, qseq[i] );
+ }
+ fclose( lfp );
+ msize = MAX(10,nd*lastm);
- free( aln1 );
- free( aln2 );
+// fprintf( stderr, "Calling lastal from calllast_once, msize=%d\n", msize );
+ sprintf( command, "%s/lastal -v -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db _q > _lastres", whereispairalign, msize, laste, -penalty, -penalty_ex );
+// sprintf( command, "lastal -v -m %d -e %d -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db _q > _lastres", 1, laste, -penalty, -penalty_ex );
+// sprintf( command, "lastal -v -e 40 -f 0 -s 1 -p _scoringmatrixforlast -a %d -b %d _db _q > _lastres", -penalty, -penalty_ex );
+ res = system( command );
+ if( res )
+ {
+ fprintf( stderr, "LAST aborted\n" );
+ exit( 1 );
+ }
- return( value );
+ lfp = fopen( "_lastres", "r" );
+ if( !lfp )
+ {
+ fprintf( stderr, "Cannot read _lastres" );
+ exit( 1 );
+ }
+// readlastres( lfp, nd, nq, lastres, dseq, qseq );
+ fprintf( stderr, "Reading lastres\n" );
+ readlastresx( lfp, nd, nq, lastresx, dseq, qseq );
+ fclose( lfp );
}
static void callfoldalign( int nseq, char **mseq )
}
}
-static float recalllara( char **mseq1, char **mseq2, int alloclen )
+static double recalllara( char **mseq1, char **mseq2, int alloclen )
{
static FILE *fp = NULL;
static char *ungap1;
static char *ori2;
// int res;
static char com[10000];
- float value;
+ double value;
if( fp == NULL )
exit( 1 );
}
- value = (float)naivepairscore11( *mseq1, *mseq2, penalty );
+ value = (double)naivepairscore11( *mseq1, *mseq2, penalty );
// fclose( fp ); // saigo dake yatta houga yoi.
}
-static float callmxscarna_giving_bpp( char **mseq1, char **mseq2, char **bpp1, char **bpp2, int alloclen, int i, int j )
+static double calldafs_giving_bpp( char **mseq1, char **mseq2, char **bpp1, char **bpp2, int alloclen, int i, int j )
{
FILE *fp;
int res;
char *com;
- float value;
+ double value;
char *dirname;
t2u( *mseq1 );
t2u( *mseq2 );
- sprintf( com, "%s/_mxscarnainorg", dirname );
+ sprintf( com, "%s/_dafsinorg", dirname );
fp = fopen( com, "w" );
if( !fp )
{
- fprintf( stderr, "Cannot open %s/_mxscarnainorg\n", dirname );
+ fprintf( stderr, "Cannot open %s/_dafsinorg\n", dirname );
exit( 1 );
}
fprintf( fp, ">1\n" );
write1seq( fp, *mseq2 );
fclose( fp );
- sprintf( com, "tr -d '\\r' < %s/_mxscarnainorg > %s/_mxscarnain", dirname, dirname );
+ sprintf( com, "tr -d '\\r' < %s/_dafsinorg > %s/_dafsin", dirname, dirname );
system( com ); // for cygwin, wakaran
-#if 0
- sprintf( com, "cd %s; %s/mxscarnamod -readbpp _mxscarnain > _mxscarnaout 2>_dum", dirname, whereispairalign );
-#else
- sprintf( com, "_mxscarnash%s", dirname );
+ sprintf( com, "_dafssh%s", dirname );
fp = fopen( com, "w" );
fprintf( fp, "cd %s\n", dirname );
- fprintf( fp, "%s/mxscarnamod -readbpp _mxscarnain > _mxscarnaout 2>_dum\n", whereispairalign );
+ fprintf( fp, "%s/dafs --mafft-in _bpp _dafsin > _dafsout 2>_dum\n", whereispairalign );
fprintf( fp, "exit $tatus\n" );
fclose( fp );
- sprintf( com, "tr -d '\\r' < _mxscarnash%s > _mxscarnash%s.unix", dirname, dirname );
+ sprintf( com, "tr -d '\\r' < _dafssh%s > _dafssh%s.unix", dirname, dirname );
system( com ); // for cygwin, wakaran
- sprintf( com, "sh _mxscarnash%s.unix 2>_dum%s", dirname, dirname );
-#endif
+ sprintf( com, "sh _dafssh%s.unix 2>_dum%s", dirname, dirname );
res = system( com );
if( res )
{
- fprintf( stderr, "Error in mxscarna\n" );
+ fprintf( stderr, "Error in dafs\n" );
exit( 1 );
}
- sprintf( com, "%s/_mxscarnaout", dirname );
+ sprintf( com, "%s/_dafsout", dirname );
fp = fopen( com, "r" );
if( !fp )
{
- fprintf( stderr, "Cannot open %s/_mxscarnaout\n", dirname );
+ fprintf( stderr, "Cannot open %s/_dafsout\n", dirname );
exit( 1 );
}
+ myfgets( com, 999, fp ); // nagai kanousei ga arunode
+ fgets( com, 999, fp );
+ myfgets( com, 999, fp ); // nagai kanousei ga arunode
fgets( com, 999, fp );
load1SeqWithoutName_new( fp, *mseq1 );
fgets( com, 999, fp );
// fprintf( stderr, "*mseq1 = %s\n", *mseq1 );
// fprintf( stderr, "*mseq2 = %s\n", *mseq2 );
- value = (float)naivepairscore11( *mseq1, *mseq2, penalty );
+ value = (double)naivepairscore11( *mseq1, *mseq2, penalty );
#if 0
sprintf( com, "rm -rf %s > /dev/null 2>&1", dirname );
return( value );
}
-#if 0
-static float callmxscarna_slow( char **mseq1, char **mseq2, int alloclen )
+static double callmxscarna_giving_bpp( char **mseq1, char **mseq2, char **bpp1, char **bpp2, int alloclen, int i, int j )
{
FILE *fp;
int res;
- static char com[10000];
- float value;
+ char *com;
+ double value;
+ char *dirname;
+
+
+ dirname = calloc( 100, sizeof( char ) );
+ com = calloc( 1000, sizeof( char ) );
+ sprintf( dirname, "_%d-%d", i, j );
+ sprintf( com, "rm -rf %s", dirname );
+ system( com );
+ sprintf( com, "mkdir %s", dirname );
+ system( com );
+
+
+ sprintf( com, "%s/_bpporg", dirname );
+ fp = fopen( com, "w" );
+ if( !fp )
+ {
+ fprintf( stderr, "Cannot write to %s/_bpporg\n", dirname );
+ exit( 1 );
+ }
+ fprintf( fp, ">a\n" );
+ while( *bpp1 )
+ fprintf( fp, "%s", *bpp1++ );
+
+ fprintf( fp, ">b\n" );
+ while( *bpp2 )
+ fprintf( fp, "%s", *bpp2++ );
+ fclose( fp );
+ sprintf( com, "tr -d '\\r' < %s/_bpporg > %s/_bpp", dirname, dirname );
+ system( com ); // for cygwin, wakaran
t2u( *mseq1 );
t2u( *mseq2 );
- fp = fopen( "_mxscarnain", "w" );
+
+ sprintf( com, "%s/_mxscarnainorg", dirname );
+ fp = fopen( com, "w" );
if( !fp )
{
- fprintf( stderr, "Cannot open _mxscarnain\n" );
+ fprintf( stderr, "Cannot open %s/_mxscarnainorg\n", dirname );
exit( 1 );
}
fprintf( fp, ">1\n" );
- fprintf( fp, "%s\n", *mseq1 );
+// fprintf( fp, "%s\n", *mseq1 );
+ write1seq( fp, *mseq1 );
fprintf( fp, ">2\n" );
- fprintf( fp, "%s\n", *mseq2 );
+// fprintf( fp, "%s\n", *mseq2 );
+ write1seq( fp, *mseq2 );
+ fclose( fp );
+
+ sprintf( com, "tr -d '\\r' < %s/_mxscarnainorg > %s/_mxscarnain", dirname, dirname );
+ system( com ); // for cygwin, wakaran
+
+#if 0
+ sprintf( com, "cd %s; %s/mxscarnamod -readbpp _mxscarnain > _mxscarnaout 2>_dum", dirname, whereispairalign );
+#else
+ sprintf( com, "_mxscarnash%s", dirname );
+ fp = fopen( com, "w" );
+ fprintf( fp, "cd %s\n", dirname );
+ fprintf( fp, "%s/mxscarnamod -readbpp _mxscarnain > _mxscarnaout 2>_dum\n", whereispairalign );
+ fprintf( fp, "exit $tatus\n" );
fclose( fp );
+//sleep( 10000 );
+
+ sprintf( com, "tr -d '\\r' < _mxscarnash%s > _mxscarnash%s.unix", dirname, dirname );
+ system( com ); // for cygwin, wakaran
- sprintf( com, "env PATH=%s mxscarnamod _mxscarnain > _mxscarnaout 2>_dum", whereispairalign );
+ sprintf( com, "sh _mxscarnash%s.unix 2>_dum%s", dirname, dirname );
+#endif
res = system( com );
if( res )
{
exit( 1 );
}
- fp = fopen( "_mxscarnaout", "r" );
+ sprintf( com, "%s/_mxscarnaout", dirname );
+
+ fp = fopen( com, "r" );
if( !fp )
{
- fprintf( stderr, "Cannot open _mxscarnaout\n" );
+ fprintf( stderr, "Cannot open %s/_mxscarnaout\n", dirname );
exit( 1 );
}
// fprintf( stderr, "*mseq1 = %s\n", *mseq1 );
// fprintf( stderr, "*mseq2 = %s\n", *mseq2 );
- value = (float)naivepairscore11( *mseq1, *mseq2, penalty );
+ value = (double)naivepairscore11( *mseq1, *mseq2, penalty );
+
+#if 0
+ sprintf( com, "rm -rf %s > /dev/null 2>&1", dirname );
+ if( system( com ) )
+ {
+ fprintf( stderr, "retrying to rmdir\n" );
+ usleep( 2000 );
+ system( com );
+ }
+#endif
+
+ free( dirname );
+ free( com );
+
return( value );
}
-#endif
static void readhat4( FILE *fp, char ***bpp )
{
}
fgets( oneline, 999, fp );
// fprintf( stderr, "oneline=%s\n", oneline );
-// sscanf( oneline, "%d %d %f", &posi, &posj, &prob );
+// sscanf( oneline, "%d %d %lf", &posi, &posj, &prob );
// fprintf( stderr, "%d %d -> %f\n", posi, posj, prob );
*bpp = realloc( *bpp, (bppsize+2) * sizeof( char * ) );
(*bpp)[bppsize] = calloc( 100, sizeof( char ) );
fclose( fp );
}
-void arguments( int argc, char *argv[] )
+static void arguments( int argc, char *argv[] )
{
int c;
nthread = 1;
+ laste = 5000;
+ lastm = 3;
+ nadd = 0;
+ lastsubopt = 0;
+ lastonce = 0;
foldalignopt[0] = 0;
laraparams = NULL;
inputfile = NULL;
stdout_dist = 0;
store_dist = 1;
store_localhom = 1;
- dorp = NOTSPECIFIED;
+// dorp = NOTSPECIFIED;
ppenalty = NOTSPECIFIED;
ppenalty_OP = NOTSPECIFIED;
ppenalty_ex = NOTSPECIFIED;
ppenalty_EX = NOTSPECIFIED;
+ penalty_shift_factor = 1000.0;
poffset = NOTSPECIFIED;
kimuraR = NOTSPECIFIED;
pamN = NOTSPECIFIED;
fftThreshold = NOTSPECIFIED;
RNAppenalty = NOTSPECIFIED;
RNApthr = NOTSPECIFIED;
-
+ specificityconsideration = 0.0;
+ usenaivescoreinsteadofalignmentscore = 0;
+ specifictarget = 0;
+ nwildcard = 0;
+
+// reporterr( "argc=%d\n", argc );
+// reporterr( "*argv=%s\n", *argv );
+// reporterr( "(*argv)[0]=%c\n", (*argv)[0] );
while( --argc > 0 && (*++argv)[0] == '-' )
{
+// reporterr( "(*argv)[0] in while loop = %s\n", (*argv) );
while ( ( c = *++argv[0] ) )
{
switch( c )
{
case 'i':
inputfile = *++argv;
- fprintf( stderr, "inputfile = %s\n", inputfile );
+// fprintf( stderr, "inputfile = %s\n", inputfile );
--argc;
goto nextoption;
case 'f':
ppenalty_EX = (int)( atof( *++argv ) * 1000 - 0.5 );
--argc;
goto nextoption;
+ case 'Q':
+ penalty_shift_factor = atof( *++argv );
+ --argc;
+ goto nextoption;
case 'h':
poffset = (int)( atof( *++argv ) * 1000 - 0.5 );
--argc;
goto nextoption;
case 'k':
- kimuraR = atoi( *++argv );
+ kimuraR = myatoi( *++argv );
// fprintf( stderr, "kimuraR = %d\n", kimuraR );
--argc;
goto nextoption;
case 'b':
- nblosum = atoi( *++argv );
+ nblosum = myatoi( *++argv );
scoremtx = 1;
// fprintf( stderr, "blosum %d\n", nblosum );
--argc;
goto nextoption;
case 'j':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = JTT;
- fprintf( stderr, "jtt %d\n", pamN );
+// fprintf( stderr, "jtt %d\n", pamN );
--argc;
goto nextoption;
case 'm':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = TM;
- fprintf( stderr, "TM %d\n", pamN );
+// fprintf( stderr, "TM %d\n", pamN );
--argc;
goto nextoption;
+#if 0
case 'l':
ppslocal = (int)( atof( *++argv ) * 1000 + 0.5 );
pslocal = (int)( 600.0 / 1000.0 * ppslocal + 0.5);
// fprintf( stderr, "pslocal = %d\n", pslocal );
--argc;
goto nextoption;
+#else
+ case 'l':
+ if( atof( *++argv ) < 0.00001 ) store_localhom = 0;
+ --argc;
+ goto nextoption;
+#endif
case 'd':
whereispairalign = *++argv;
fprintf( stderr, "whereispairalign = %s\n", whereispairalign );
--argc;
goto nextoption;
case 'C':
- nthread = atoi( *++argv );
- fprintf( stderr, "nthread = %d\n", nthread );
+ nthread = myatoi( *++argv );
+// fprintf( stderr, "nthread = %d\n", nthread );
+ --argc;
+#ifndef enablemultithread
+ nthread = 0;
+#endif
+ goto nextoption;
+ case 'I':
+ nadd = myatoi( *++argv );
+// fprintf( stderr, "nadd = %d\n", nadd );
+ --argc;
+ goto nextoption;
+ case 'w':
+ lastm = myatoi( *++argv );
+ fprintf( stderr, "lastm = %d\n", lastm );
+ --argc;
+ goto nextoption;
+ case 'e':
+ laste = myatoi( *++argv );
+ fprintf( stderr, "laste = %d\n", laste );
+ --argc;
+ goto nextoption;
+ case 'u':
+ specificityconsideration = (double)myatof( *++argv );
+// fprintf( stderr, "specificityconsideration = %f\n", specificityconsideration );
--argc;
goto nextoption;
+ case 'K': // Hontou ha iranai. disttbfast.c, tbfast.c to awaserutame.
+ break;
case 'c':
stdout_dist = 1;
break;
fmodel = 1;
break;
#endif
+#if 0
case 'r':
fmodel = -1;
break;
+#endif
case 'D':
dorp = 'd';
break;
case 'P':
dorp = 'p';
break;
+#if 0
case 'e':
fftscore = 0;
break;
-#if 0
case 'O':
fftNoAnchStop = 1;
break;
#endif
+#if 0
case 'Q':
calledByXced = 1;
break;
-#if 0
case 'x':
disp = 1;
break;
case 'a':
alg = 'a';
break;
-#endif
case 'S':
alg = 'S';
break;
+#endif
+ case 'U':
+ lastonce = 1;
+ break;
+ case 'S':
+ lastsubopt = 1;
+ break;
case 't':
alg = 't';
store_localhom = 0;
case 'L':
alg = 'L';
break;
+ case 'Y':
+ alg = 'Y'; // nadd>0 no toki nomi. moto no hairetsu to atarashii hairetsuno alignmnt -> L;
+ break;
+ case 'Z':
+ usenaivescoreinsteadofalignmentscore = 1;
+ break;
case 's':
alg = 's';
break;
+ case 'G':
+ alg = 'G';
+ break;
case 'B':
alg = 'B';
break;
case 'R':
alg = 'R';
break;
+ case 'r':
+ alg = 'r'; // nadd>0 no toki nomi. moto no hairetsu to atarashii hairetsuno alignmnt -> R, last
+ break;
case 'N':
alg = 'N';
break;
- case 'K':
- alg = 'K';
- break;
case 'A':
alg = 'A';
break;
case 'y':
divpairscore = 1;
break;
+ case '=':
+ specifictarget = 1;
+ break;
+ case ':':
+ nwildcard = 1;
+ break;
/* Modified 01/08/27, default: user tree */
case 'J':
tbutree = 0;
fprintf( stderr, "foldalignopt = %s\n", foldalignopt );
--argc;
goto nextoption;
+#if 0
case 'z':
- fftThreshold = atoi( *++argv );
+ fftThreshold = myatoi( *++argv );
--argc;
goto nextoption;
case 'w':
- fftWinSize = atoi( *++argv );
+ fftWinSize = myatoi( *++argv );
--argc;
goto nextoption;
case 'Z':
checkC = 1;
break;
+#endif
default:
fprintf( stderr, "illegal option %c\n", c );
argc = 0;
}
if( argc != 0 )
{
- fprintf( stderr, "options: Check source file !\n" );
+ fprintf( stderr, "pairlocalalign options: Check source file !\n" );
exit( 1 );
}
if( tbitr == 1 && outgap == 0 )
return( val );
}
+static double score2dist( double pscore, double selfscore1, double selfscore2)
+{
+ double val;
+ double bunbo;
+// fprintf( stderr, "In score2dist\n" );
+
+ if( (bunbo=MIN( selfscore1, selfscore2 )) == 0.0 )
+ val = 2.0;
+ else if( bunbo < pscore ) // mondai ari
+ val = 0.0;
+ else
+ val = ( 1.0 - pscore / bunbo ) * 2.0;
+ return( val );
+}
+
#if enablemultithread
-static void *athread( void *arg )
+static void *athread( void *arg ) // alg='R', alg='r' -> tsukawarenai.
{
thread_arg_t *targ = (thread_arg_t *)arg;
- int i, j;
- int clus1, clus2;
- int off1, off2;
+ int i, ilim, j, jst;
+ int off1, off2, dum1, dum2, thereisx;
int intdum;
- double bunbo;
- float pscore = 0.0; // by D.Mathog
- double *effarr;
+ double pscore = 0.0; // by D.Mathog
double *effarr1;
double *effarr2;
- char **pair;
- char *indication1, *indication2;
- char **mseq1, **mseq2;
+ char **mseq1, **mseq2, **distseq1, **distseq2, **dumseq1, **dumseq2;
char **aseq;
+ double **dynamicmtx = NULL;
+ double dist;
+ double scoreoffset;
// thread_arg
int thread_no = targ->thread_no;
Jobtable *jobpospt = targ->jobpospt;
char **name = targ->name;
char **seq = targ->seq;
+ char **dseq = targ->dseq;
+ int *thereisxineachseq = targ->thereisxineachseq;
LocalHom **localhomtable = targ->localhomtable;
double **distancemtx = targ->distancemtx;
double *selfscore = targ->selfscore;
char ***bpp = targ->bpp;
+ Lastresx **lastresx = targ->lastresx;
int alloclen = targ->alloclen;
+ int *targetmap = targ->targetmap;
// fprintf( stderr, "thread %d start!\n", thread_no );
- effarr = AllocateDoubleVec( njob );
- for( i=0; i<njob; i++ ) effarr[i] = 1.0;
- effarr1 = AllocateDoubleVec( njob );
- effarr2 = AllocateDoubleVec( njob );
- indication1 = AllocateCharVec( 150 );
- indication2 = AllocateCharVec( 150 );
- pair = AllocateCharMtx( njob, njob );
- for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) pair[i][j] = 0;
- for( i=0; i<njob; i++ ) pair[i][i] = 1;
+ effarr1 = AllocateDoubleVec( 1 );
+ effarr2 = AllocateDoubleVec( 1 );
mseq1 = AllocateCharMtx( njob, 0 );
mseq2 = AllocateCharMtx( njob, 0 );
- aseq = AllocateCharMtx( njob, alloclen+10 );
+ if( alg == 'N' )
+ {
+ dumseq1 = AllocateCharMtx( 1, alloclen+10 );
+ dumseq2 = AllocateCharMtx( 1, alloclen+10 );
+ }
+ distseq1 = AllocateCharMtx( 1, 0 );
+ distseq2 = AllocateCharMtx( 1, 0 );
+ aseq = AllocateCharMtx( 2, alloclen+10 );
+ if( specificityconsideration > 0.0 ) dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets );
+
+ if( alg == 'Y' || alg == 'r' ) ilim = njob - nadd;
+ else ilim = njob - 1;
+
while( 1 )
{
if( j == njob )
{
i++;
- j = i + 1;
- if( i == njob-1 )
+
+ if( alg == 'Y' || alg == 'r' ) jst = njob - nadd;
+ else jst = i + 1;
+ j = jst;
+
+ if( i == ilim )
{
// fprintf( stderr, "thread %d end!\n", thread_no );
pthread_mutex_unlock( targ->mutex_counter );
commonIP = NULL;
if( commonJP ) FreeIntMtx( commonJP );
commonJP = NULL;
- Falign( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+ Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+ G__align11( NULL, NULL, NULL, 0, 0, 0 ); // 20130603
G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 );
- L__align11( NULL, NULL, 0, NULL, NULL );
- genL__align11( NULL, NULL, 0, NULL, NULL );
- free( effarr );
+ L__align11( NULL, 0.0, NULL, NULL, 0, NULL, NULL );
+ L__align11_noalign( NULL, NULL, NULL );
+ genL__align11( NULL, NULL, NULL, 0, NULL, NULL );
free( effarr1 );
free( effarr2 );
- free( indication1 );
- free( indication2 );
- FreeCharMtx( pair );
free( mseq1 );
free( mseq2 );
+ if( alg == 'N' )
+ {
+ FreeCharMtx( dumseq1 );
+ FreeCharMtx( dumseq2 );
+ }
+ free( distseq1 );
+ free( distseq2 );
FreeCharMtx( aseq );
+ if( dynamicmtx ) FreeDoubleMtx( dynamicmtx );
return( NULL );
}
}
pthread_mutex_unlock( targ->mutex_counter );
- if( j == i+1 || j % 100 == 0 )
+// if( j == i+1 || j % 100 == 0 )
+ if( j == i+1 && i % 10 == 0 )
{
- fprintf( stderr, "% 5d / %d (by thread %3d) \r", i, njob, thread_no );
+ fprintf( stderr, "% 5d / %d (by thread %3d) \r", i, njob-nadd, thread_no );
// fprintf( stderr, "% 5d - %5d / %d (thread %d)\n", i, j, njob, thread_no );
}
if( strlen( seq[i] ) == 0 || strlen( seq[j] ) == 0 )
{
- if( store_dist ) distancemtx[i][j] = 2.0;
+ if( store_dist )
+ {
+ if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = 3.0;
+ else distancemtx[i][j-i] = 3.0;
+ }
if( stdout_dist)
{
pthread_mutex_lock( targ->mutex_stdout );
- fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, 2.0 );
+ fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, 3.0 );
pthread_mutex_unlock( targ->mutex_stdout );
}
continue;
}
- strcpy( aseq[i], seq[i] );
- strcpy( aseq[j], seq[j] );
- clus1 = conjuctionfortbfast( pair, i, aseq, mseq1, effarr1, effarr, indication1 );
- clus2 = conjuctionfortbfast( pair, j, aseq, mseq2, effarr2, effarr, indication2 );
+ strcpy( aseq[0], seq[i] );
+ strcpy( aseq[1], seq[j] );
+// clus1 = conjuctionfortbfast( pair, i, aseq, mseq1, effarr1, effarr, indication1 );
+// clus2 = conjuctionfortbfast( pair, j, aseq, mseq2, effarr2, effarr, indication2 );
+// fprintf( stderr, "Skipping conjuction..\n" );
+
+ effarr1[0] = 1.0;
+ effarr2[0] = 1.0;
+ mseq1[0] = aseq[0];
+ mseq2[0] = aseq[1];
+
+ thereisx = thereisxineachseq[i] + thereisxineachseq[j];
+// strcpy( distseq1[0], dseq[i] ); // nen no tame
+// strcpy( distseq2[0], dseq[j] ); // nen no tame
+ distseq1[0] = dseq[i];
+ distseq2[0] = dseq[j];
+
// fprintf( stderr, "mseq1 = %s\n", mseq1[0] );
// fprintf( stderr, "mseq2 = %s\n", mseq2[0] );
if( use_fft )
{
- pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum, NULL, 0, NULL );
+ pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, 1, 1, alloclen, &intdum, NULL, 0, NULL );
// fprintf( stderr, "pscore (fft) = %f\n", pscore );
off1 = off2 = 0;
}
{
switch( alg )
{
+ case( 'R' ):
+ if( nadd && njob-nadd <= j && njob-nadd <= i ) // new sequence doushi ha mushi
+ pscore = 0.0;
+ else
+ pscore = (double)lastresx[i][j].score; // all pair
+ break;
+ case( 'r' ):
+ if( nadd == 0 || ( i < njob-nadd && njob-nadd <= j ) )
+ pscore = (double)lastresx[i][j-(njob-nadd)].score;
+ else
+ pscore = 0.0;
+ break;
case( 'L' ):
- pscore = G__align11_noalign( amino_dis, penalty, penalty_ex, mseq1, mseq2, alloclen );
- L__align11( mseq1, mseq2, alloclen, &off1, &off2 );
+ if( nadd && njob-nadd <= j && njob-nadd <= i ) // new sequence doushi ha mushi
+ pscore = 0.0;
+ else
+ {
+ if( usenaivescoreinsteadofalignmentscore )
+ {
+ L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 );
+ pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki
+ }
+ else
+ {
+// if( store_localhom )
+ if( store_localhom && ( targetmap[i] != -1 || targetmap[j] != -1 ) )
+ {
+ pscore = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 );
+ if( thereisx ) pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // uwagaki
+#if 1
+ if( specificityconsideration > 0.0 )
+ {
+ dist = score2dist( pscore, selfscore[i], selfscore[j] );
+ if( ( scoreoffset = dist2offset( dist ) ) < 0.0 )
+ {
+ makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru.
+ strcpy( mseq1[0], seq[i] );
+ strcpy( mseq2[0], seq[j] );
+ L__align11( dynamicmtx, scoreoffset, mseq1, mseq2, alloclen, &off1, &off2 );
+ }
+ }
+#endif
+ }
+ else
+ pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 );
+ }
+ }
+// pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // CHUUI!!!!!!
+ break;
+ case( 'Y' ):
+ if( nadd == 0 || ( i < njob-nadd && njob-nadd <= j ) ) // new sequence vs exiting sequence nomi keisan
+ {
+ if( usenaivescoreinsteadofalignmentscore )
+ {
+ L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 );
+ pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki
+ }
+ else
+ {
+ if( store_localhom )
+ {
+ pscore = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 );
+ if( thereisx ) pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // uwagaki
+ }
+ else
+ pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 );
+ }
+ }
+ else
+ pscore = 0.0;
break;
case( 'A' ):
- pscore = G__align11( mseq1, mseq2, alloclen, outgap, outgap );
+ if( usenaivescoreinsteadofalignmentscore )
+ {
+ G__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, outgap, outgap );
+ pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki
+ }
+ else
+ {
+// if( store_localhom )
+ if( store_localhom && ( targetmap[i] != -1 || targetmap[j] != -1 ) )
+ {
+ pscore = G__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, outgap, outgap );
+ if( thereisx ) pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // uwagaki
+#if 1
+ if( specificityconsideration > 0.0 )
+ {
+ dist = score2dist( pscore, selfscore[i], selfscore[j] );
+// dist = score2dist( L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ), selfscore[i], selfscore[j] ); // 2014/Feb/20
+ if( dist2offset( dist ) < 0.0 )
+ {
+ makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru.
+ strcpy( mseq1[0], seq[i] );
+ strcpy( mseq2[0], seq[j] );
+ G__align11( dynamicmtx, mseq1, mseq2, alloclen, outgap, outgap );
+
+ }
+// pscore = (double)naivepairscore11( *mseq1, *mseq2, 0.0 );
+ }
+#endif
+ }
+ else
+ pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // uwagaki
+ }
off1 = off2 = 0;
break;
case( 'N' ):
- pscore = G__align11_noalign( amino_dis, penalty, penalty_ex, mseq1, mseq2, alloclen );
- genL__align11( mseq1, mseq2, alloclen, &off1, &off2 );
+ if( usenaivescoreinsteadofalignmentscore )
+ {
+ genL__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, &off1, &off2 );
+ pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki
+ }
+ else
+ {
+// pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, alloclen );
+ pscore = genL__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, &off1, &off2 );
+ if( thereisx )
+ {
+ strcpy( dumseq1[0], distseq1[0] );
+ strcpy( dumseq2[0], distseq2[0] );
+ pscore = genL__align11( n_dis_consweight_multi, dumseq1, dumseq2, alloclen, &dum1, &dum2 ); // uwagaki
+ }
+#if 1
+ if( specificityconsideration > 0.0 )
+ {
+ dist = score2dist( pscore, selfscore[i], selfscore[j] );
+ if( dist2offset( dist ) < 0.0 )
+ {
+ makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru.
+ strcpy( mseq1[0], seq[i] );
+ strcpy( mseq2[0], seq[j] );
+ genL__align11( dynamicmtx, mseq1, mseq2, alloclen, &off1, &off2 );
+ }
+ }
+#endif
+ }
break;
case( 't' ):
- pscore = G__align11_noalign( amino_dis, penalty, penalty_ex, mseq1, mseq2, alloclen );
off1 = off2 = 0;
+// pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, alloclen );
+ pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // tsuneni distseq shiyou
break;
case( 's' ):
pscore = callmxscarna_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen, i, j );
off1 = off2 = 0;
break;
+ case( 'G' ):
+ pscore = calldafs_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen, i, j );
+ off1 = off2 = 0;
+ break;
#if 0
case( 'a' ):
pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
off1 = off2 = 0;
break;
case( 'K' ):
- pscore = genG__align11( mseq1, mseq2, alloclen );
+ pscore = genG__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen );
off1 = off2 = 0;
break;
case( 'H' ):
#if SCOREOUT
fprintf( stderr, "score = %10.2f (%d,%d)\n", pscore, i, j );
#endif
- if( !store_localhom )
- ;
- else if( alg == 'H' )
- putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) );
- else if( alg != 'S' && alg != 'V' )
+// if( pscore > 0.0 && ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) x-ins-i de seido teika
+ if( ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) )
{
- putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) );
+ if( !store_localhom )
+ ;
+ else if( specifictarget && targetmap[i] == -1 && targetmap[j] == -1)
+ ;
+ else if( alg == 'R' )
+ putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j, lastresx[i]+j, 'h' );
+ else if( alg == 'r' )
+ putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), lastresx[i]+j-(njob-nadd), 'h' );// ?????
+ else if( alg == 'H' )
+ putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' );
+ else if( alg == 'Y' )
+ putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' );
+ else if( !specifictarget && alg != 'S' && alg != 'V' )
+ putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-i, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' );
+ else
+// putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) );
+ {
+ if( targetmap[i] != -1 && targetmap[j] != -1 )
+ {
+ putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' );
+ putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' ); // sukoshi muda.
+ }
+ else if( targetmap[j] != -1 )
+ putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' );
+ else if( targetmap[i] != -1 )
+ putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' );
+#if 0
+ if( targetmap[i] != -1 )
+ putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' );
+
+ else if( targetmap[j] != -1 )
+ putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' );
+#endif
+ else
+ {
+ reporterr( "okashii\n" );
+ exit( 1 );
+ }
+ }
}
+ pscore = score2dist( pscore, selfscore[i], selfscore[j] );
+
+// pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 );
+// pscore = score2dist( pscore, selfscore[i], selfscore[j] );
+// reporterr( "->pscore = %f\n", pscore );
- if( (bunbo=MIN( selfscore[i], selfscore[j] )) == 0.0 || bunbo < pscore )
- pscore = 2.0;
- else
- pscore = ( 1.0 - pscore / bunbo ) * 2.0;
}
else
{
pthread_mutex_unlock( targ->mutex_stdout );
}
#endif // mutex
- if( store_dist) distancemtx[i][j] = pscore;
+ if( store_dist )
+ {
+ if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = pscore;
+ else distancemtx[i][j-i] = pscore;
+ }
}
}
#endif
-static void pairalign( char **name, int nlen[M], char **seq, char **aseq, char **mseq1, char **mseq2, double *effarr, int alloclen )
+static void pairalign( char **name, int *nlen, char **seq, char **aseq, char **dseq, int *thereisxineachseq, char **mseq1, char **mseq2, int alloclen, Lastresx **lastresx, double **distancemtx, LocalHom **localhomtable, int ngui )
{
- int i, j, ilim;
- int clus1, clus2;
- int off1, off2;
- float pscore = 0.0; // by D.Mathog
- static char *indication1, *indication2;
+ int i, j, ilim, jst, jj;
+ int off1, off2, dum1, dum2, thereisx;
+ double pscore = 0.0; // by D.Mathog
FILE *hat2p, *hat3p;
- double **distancemtx;
+// double **distancemtx;
double *selfscore;
double *effarr1;
double *effarr2;
char *pt;
char *hat2file = "hat2";
- LocalHom **localhomtable = NULL, *tmpptr;
- static char **pair;
+// LocalHom **localhomtable = NULL,
+ LocalHom *tmpptr;
int intdum;
- double bunbo;
char ***bpp = NULL; // mxscarna no toki dake
+ char **distseq1, **distseq2;
+ char **dumseq1, **dumseq2;
+ double dist;
+ double scoreoffset;
+ int ntarget;
+ int *targetmap, *targetmapr;
+
+
+ if( specifictarget )
+ {
+ targetmap = calloc( njob, sizeof( int ) );
+ ntarget = 0;
+ for( i=0; i<njob; i++ )
+ {
+ targetmap[i] = -1;
+ if( !strncmp( name[i]+1, "_focus_", 7 ) )
+ targetmap[i] = ntarget++;
+ }
+ targetmapr = calloc( ntarget, sizeof( int ) );
+ for( i=0; i<njob; i++ )
+ if( targetmap[i] != -1 ) targetmapr[targetmap[i]] = i;
+
+ if( ntarget == 0 )
+ {
+ reporterr( "\n\nAdd '>_focus_' to the title lines of the sequences to be focused on.\n\n" );
+ exit( 1 );
+ }
+ else
+ {
+ reporterr( "nfocus = %d \n", ntarget );
+ }
+ }
+ else
+ {
+ ntarget = njob;
+ targetmap = calloc( njob, sizeof( int ) );
+ targetmapr = calloc( njob, sizeof( int ) );
+ for( i=0; i<njob; i++ )
+ targetmap[i] = targetmapr[i] = i;
+ }
+
+#if 0
+ for( i=0; i<njob; i++ )
+ reporterr( "targetmap[%d] = %d\n", i, targetmap[i] );
+ for( i=0; i<ntarget; i++ )
+ reporterr( "targetmapr[%d] = %d\n", i, targetmapr[i] );
+#endif
- if( store_localhom )
+ if( store_localhom && localhomtable == NULL )
{
- localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
- for( i=0; i<njob; i++)
+ if( alg == 'Y' || alg == 'r' )
+ {
+ ilim = njob - nadd;
+ jst = nadd;
+ }
+ else
{
- localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
- for( j=0; j<njob; j++)
+ ilim = ntarget;
+ jst = njob;
+ }
+ localhomtable = (LocalHom **)calloc( ilim, sizeof( LocalHom *) );
+ for( i=0; i<ilim; i++)
+ {
+ localhomtable[i] = (LocalHom *)calloc( jst, sizeof( LocalHom ) );
+ for( j=0; j<jst; j++)
{
localhomtable[i][j].start1 = -1;
localhomtable[i][j].end1 = -1;
localhomtable[i][j].opt = -1.0;
localhomtable[i][j].next = NULL;
localhomtable[i][j].nokori = 0;
+ localhomtable[i][j].extended = -1;
+ localhomtable[i][j].last = localhomtable[i]+j;
+ localhomtable[i][j].korh = 'h';
}
+ if( !specifictarget && alg != 'Y' && alg != 'r' ) jst--;
}
}
- if( store_dist ) distancemtx = AllocateDoubleMtx( njob, njob );
+ if( store_dist )
+ {
+ if( ngui == 0 )
+ {
+ if( alg == 'Y' || alg == 'r' )
+ distancemtx = AllocateDoubleMtx( njob, nadd );
+ else
+ distancemtx = AllocateDoubleHalfMtx( njob );
+// distancemtx = AllocateDoubleMtx( njob, njob );
+ }
+ }
else distancemtx = NULL;
+
+ if( alg == 'N' )
+ {
+ dumseq1 = AllocateCharMtx( 1, alloclen+10 );
+ dumseq2 = AllocateCharMtx( 1, alloclen+10 );
+ }
+ distseq1 = AllocateCharMtx( 1, 0 ); // muda
+ distseq2 = AllocateCharMtx( 1, 0 ); // muda
+
selfscore = AllocateDoubleVec( njob );
effarr1 = AllocateDoubleVec( njob );
effarr2 = AllocateDoubleVec( njob );
- indication1 = AllocateCharVec( 150 );
- indication2 = AllocateCharVec( 150 );
-#if 0
-#else
- pair = AllocateCharMtx( njob, njob );
-#endif
#if 0
fprintf( stderr, "##### fftwinsize = %d, fftthreshold = %d\n", fftWinSize, fftThreshold );
// writePre( njob, name, nlen, aseq, 0 );
- for( i=0; i<njob; i++ ) for( j=0; j<njob; j++ ) pair[i][j] = 0;
- for( i=0; i<njob; i++ ) pair[i][i] = 1;
+ reporterr( "All-to-all alignment.\n" );
+ if( alg == 'R' )
+ {
+ fprintf( stderr, "Calling last (http://last.cbrc.jp/)\n" );
+ if( lastonce )
+ calllast_once( njob, seq, njob, seq, lastresx );
+ else
+ calllast_fast( njob, seq, njob, seq, lastresx );
+ fprintf( stderr, "done.\n" );
+// nthread = 0; // igo multithread nashi
+ }
+ if( alg == 'r' )
+ {
+ fprintf( stderr, "Calling last (http://last.cbrc.jp/)\n" );
+ fprintf( stderr, "nadd=%d\n", nadd );
+#if 1 // last_fast ha, lastdb ga muda
+ if( lastonce )
+ calllast_once( njob-nadd, seq, nadd, seq+njob-nadd, lastresx );
+ else
+ calllast_fast( njob-nadd, seq, nadd, seq+njob-nadd, lastresx );
+#else
+ calllast_once( njob-nadd, seq, nadd, seq+njob-nadd, lastresx );
+#endif
+
+ fprintf( stderr, "nadd=%d\n", nadd );
+ fprintf( stderr, "done.\n" );
+// nthread = 0; // igo multithread nashi
+ }
if( alg == 'H' )
{
fprintf( stderr, "done.\n" );
fprintf( stderr, "Running MXSCARNA (Tabei et al. http://www.ncrna.org/software/mxscarna)\n" );
}
+ if( alg == 'G' )
+ {
+ fprintf( stderr, "Preparing bpp\n" );
+// bpp = AllocateCharCub( njob, nlenmax, 0 );
+ bpp = calloc( njob, sizeof( char ** ) );
+ preparebpp( njob, bpp );
+ fprintf( stderr, "done.\n" );
+ fprintf( stderr, "Running DAFS (Sato et al. http://www.ncrna.org/)\n" );
+ }
for( i=0; i<njob; i++ )
{
pscore = 0.0;
for( pt=seq[i]; *pt; pt++ )
- pscore += amino_dis[(int)*pt][(int)*pt];
+ pscore += amino_dis[(unsigned char)*pt][(unsigned char)*pt];
selfscore[i] = pscore;
-
+// fprintf( stderr, "selfscore[%d] = %f\n", i, selfscore[i] );
}
#if enablemultithread
- if( nthread > 0 )
+ if( nthread > 0 ) // alg=='r' || alg=='R' -> nthread:=0 (sukoshi ue)
{
Jobtable jobpos;
pthread_t *handle;
pthread_mutex_t mutex_stdout;
thread_arg_t *targ;
+ if( alg == 'Y' || alg == 'r' ) jobpos.j = njob - nadd - 1;
+ else jobpos.j = 0;
jobpos.i = 0;
- jobpos.j = 0;
targ = calloc( nthread, sizeof( thread_arg_t ) );
handle = calloc( nthread, sizeof( pthread_t ) );
targ[i].jobpospt = &jobpos;
targ[i].name = name;
targ[i].seq = seq;
+ targ[i].dseq = dseq;
+ targ[i].thereisxineachseq = thereisxineachseq;
targ[i].localhomtable = localhomtable;
targ[i].distancemtx = distancemtx;
targ[i].selfscore = selfscore;
targ[i].bpp = bpp;
+ targ[i].lastresx = lastresx;
targ[i].alloclen = alloclen;
+ targ[i].targetmap = targetmap;
targ[i].mutex_counter = &mutex_counter;
targ[i].mutex_stdout = &mutex_stdout;
else
#endif
{
- ilim = njob - 1;
+ double **dynamicmtx = NULL;
+ if( specificityconsideration > 0.0 ) dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets );
+
+ if( alg == 'Y' || alg == 'r' ) ilim = njob - nadd;
+ else ilim = njob - 1;
for( i=0; i<ilim; i++ )
{
if( stdout_dist) fprintf( stdout, "%d %d d=%.3f\n", i+1, i+1, 0.0 );
- fprintf( stderr, "% 5d / %d\r", i, njob );
+ fprintf( stderr, "% 5d / %d\r", i, njob-nadd );
fflush( stderr );
- for( j=i+1; j<njob; j++ )
+
+ if( alg == 'Y' || alg == 'r' ) jst = njob - nadd;
+ else jst = i + 1;
+ for( j=jst; j<njob; j++ )
{
if( strlen( seq[i] ) == 0 || strlen( seq[j] ) == 0 )
{
- if( store_dist ) distancemtx[i][j] = 2.0;
- if( stdout_dist) fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, 2.0 );
+ if( store_dist )
+ {
+ if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = 3.0;
+ else distancemtx[i][j-i] = 3.0;
+ }
+ if( stdout_dist) fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, 3.0 );
continue;
}
- strcpy( aseq[i], seq[i] );
- strcpy( aseq[j], seq[j] );
- clus1 = conjuctionfortbfast( pair, i, aseq, mseq1, effarr1, effarr, indication1 );
- clus2 = conjuctionfortbfast( pair, j, aseq, mseq2, effarr2, effarr, indication2 );
+ strcpy( aseq[0], seq[i] );
+ strcpy( aseq[1], seq[j] );
+// clus1 = conjuctionfortbfast( pair, i, aseq, mseq1, effarr1, effarr, indication1 );
+// clus2 = conjuctionfortbfast( pair, j, aseq, mseq2, effarr2, effarr, indication2 );
+// fprintf( stderr, "Skipping conjuction..\n" );
+
+ effarr1[0] = 1.0;
+ effarr2[0] = 1.0;
+ mseq1[0] = aseq[0];
+ mseq2[0] = aseq[1];
+
+ thereisx = thereisxineachseq[i] + thereisxineachseq[j];
+// strcpy( distseq1[0], dseq[i] ); // nen no tame
+// strcpy( distseq2[0], dseq[j] ); // nen no tame
+ distseq1[0] = dseq[i];
+ distseq2[0] = dseq[j];
+
// fprintf( stderr, "mseq1 = %s\n", mseq1[0] );
// fprintf( stderr, "mseq2 = %s\n", mseq2[0] );
if( use_fft )
{
- pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum, NULL, 0, NULL );
+ pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, 1, 1, alloclen, &intdum, NULL, 0, NULL );
// fprintf( stderr, "pscore (fft) = %f\n", pscore );
off1 = off2 = 0;
}
{
switch( alg )
{
- case( 'a' ):
- pscore = Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
- off1 = off2 = 0;
- break;
case( 't' ):
- pscore = G__align11_noalign( amino_dis, penalty, penalty_ex, mseq1, mseq2, alloclen );
+// pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, alloclen );
+ pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // tsuneni distseq shiyou
off1 = off2 = 0;
break;
case( 'A' ):
- pscore = G__align11( mseq1, mseq2, alloclen, outgap, outgap );
+ if( usenaivescoreinsteadofalignmentscore )
+ {
+ G__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, outgap, outgap );
+ pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki
+ }
+ else
+ {
+// if( store_localhom )
+ if( store_localhom && ( targetmap[i] != -1 || targetmap[j] != -1 ) )
+ {
+ pscore = G__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, outgap, outgap );
+ if( thereisx ) pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // uwagaki
+#if 1
+ if( specificityconsideration > 0.0 )
+ {
+ dist = score2dist( pscore, selfscore[i], selfscore[j] );
+// dist = score2dist( L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ), selfscore[i], selfscore[j] ); // 2014/Feb/20
+ if( dist2offset( dist ) < 0.0 )
+ {
+ makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru.
+ strcpy( mseq1[0], seq[i] );
+ strcpy( mseq2[0], seq[j] );
+ G__align11( dynamicmtx, mseq1, mseq2, alloclen, outgap, outgap );
+ }
+// pscore = (double)naivepairscore11( *mseq1, *mseq2, 0.0 );
+ }
+#endif
+ }
+ else
+ pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // uwagaki
+ }
off1 = off2 = 0;
break;
case( 'N' ):
- pscore = G__align11_noalign( amino_dis, penalty, penalty_ex, mseq1, mseq2, alloclen );
- genL__align11( mseq1, mseq2, alloclen, &off1, &off2 );
+// pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, mseq1, mseq2, alloclen );
+ if( usenaivescoreinsteadofalignmentscore )
+ {
+ genL__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, &off1, &off2 );
+ pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki
+ }
+ else
+ {
+ pscore = genL__align11( n_dis_consweight_multi, mseq1, mseq2, alloclen, &off1, &off2 );
+ if( thereisx )
+ {
+ strcpy( dumseq1[0], distseq1[0] );
+ strcpy( dumseq2[0], distseq2[0] );
+ pscore = genL__align11( n_dis_consweight_multi, dumseq1, dumseq2, alloclen, &dum1, &dum2 ); // uwagaki
+ }
+#if 1
+ if( specificityconsideration > 0.0 )
+ {
+// fprintf( stderr, "dist = %f\n", score2dist( pscore, selfscore[i], selfscore[j] ) );
+ dist = score2dist( pscore, selfscore[i], selfscore[j] );
+ if( dist2offset( dist ) < 0.0 )
+ {
+ makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru.
+ strcpy( mseq1[0], seq[i] );
+ strcpy( mseq2[0], seq[j] );
+ genL__align11( dynamicmtx, mseq1, mseq2, alloclen, &off1, &off2 );
+ }
+ }
+#endif
+ }
+ break;
+ case( 'R' ):
+ if( nadd && njob-nadd <= j && njob-nadd <= i ) // new sequence doushi ha mushi
+ pscore = 0.0;
+ else
+ pscore = (double)lastresx[i][j].score; // all pair
+ break;
+ case( 'r' ):
+ if( nadd == 0 || ( i < njob-nadd && njob-nadd <= j ) )
+ pscore = (double)lastresx[i][j-(njob-nadd)].score;
+ else
+ pscore = 0.0;
+ break;
+ case( 'L' ):
+ if( nadd && njob-nadd <= j && njob-nadd <= i ) // new sequence doushi ha mushi
+ pscore = 0.0;
+ else
+ {
+ if( usenaivescoreinsteadofalignmentscore )
+ {
+ L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 );
+ pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki
+ }
+ else
+ {
+// if( store_localhom )
+ if( store_localhom && ( targetmap[i] != -1 || targetmap[j] != -1 ) )
+ {
+ pscore = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 ); // all pair
+ if( thereisx ) pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // all pair
+#if 1
+ if( specificityconsideration > 0.0 )
+ {
+ dist = score2dist( pscore, selfscore[i], selfscore[j] );
+ if( ( scoreoffset = dist2offset( dist ) ) < 0.0 )
+ {
+ makedynamicmtx( dynamicmtx, n_dis_consweight_multi, 0.5 * dist ); // upgma ni awaseru.
+ strcpy( mseq1[0], seq[i] );
+ strcpy( mseq2[0], seq[j] );
+ L__align11( dynamicmtx, scoreoffset, mseq1, mseq2, alloclen, &off1, &off2 );
+ }
+ }
+#endif
+ }
+ else
+ pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // all pair
+ }
+ }
+// pscore = G__align11_noalign( n_dis_consweight_multi, penalty, penalty_ex, distseq1, distseq2, alloclen ); // CHUUI!!!!!!
break;
+ case( 'Y' ):
+ if( nadd == 0 || ( i < njob-nadd && njob-nadd <= j ) ) // new sequence vs exiting sequence nomi keisan
+ {
+ if( usenaivescoreinsteadofalignmentscore )
+ {
+ L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 );
+ pscore = (double)naivepairscore11( mseq1[0], mseq2[0], 0.0 ); // uwagaki
+ }
+ else
+ {
+ if( store_localhom )
+ {
+ pscore = L__align11( n_dis_consweight_multi, 0.0, mseq1, mseq2, alloclen, &off1, &off2 );
+ if( thereisx ) pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 ); // uwagaki
+ }
+ else
+ pscore = L__align11_noalign( n_dis_consweight_multi, distseq1, distseq2 );
+ }
+ }
+ else
+ pscore = 0.0;
+ break;
+ case( 'a' ):
+ pscore = Aalign( mseq1, mseq2, effarr1, effarr2, 1, 1, alloclen );
+ off1 = off2 = 0;
+ break;
+#if 0
case( 'K' ):
pscore = genG__align11( mseq1, mseq2, alloclen );
off1 = off2 = 0;
break;
- case( 'L' ):
- pscore = G__align11_noalign( amino_dis, penalty, penalty_ex, mseq1, mseq2, alloclen );
- L__align11( mseq1, mseq2, alloclen, &off1, &off2 );
- break;
+#endif
case( 'H' ):
pscore = recallpairfoldalign( mseq1, mseq2, i, j, &off1, &off2, alloclen );
break;
pscore = callmxscarna_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen, i, j );
off1 = off2 = 0;
break;
+ case( 'G' ):
+ pscore = calldafs_giving_bpp( mseq1, mseq2, bpp[i], bpp[j], alloclen, i, j );
+ off1 = off2 = 0;
+ break;
case( 'M' ):
pscore = MSalign11( mseq1, mseq2, alloclen );
break;
#if SCOREOUT
fprintf( stderr, "score = %10.2f (%d,%d)\n", pscore, i, j );
#endif
- if( !store_localhom )
- ;
- else if( alg == 'H' )
- putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) );
- else if( alg != 'S' && alg != 'V' )
- putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ) );
-
-
- if( (bunbo=MIN( selfscore[i], selfscore[j] )) == 0.0 || bunbo < pscore )
- pscore = 2.0;
- else
- pscore = ( 1.0 - pscore / bunbo ) * 2.0;
+// if( pscore > 0.0 && ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) ) // x-ins-i de seido teika
+ if( ( nadd == 0 || ( alg != 'Y' && alg != 'r' ) || ( i < njob-nadd && njob-nadd <= j ) ) )
+ {
+ if( !store_localhom )
+ ;
+ else if( specifictarget && targetmap[i] == -1 && targetmap[j] == -1)
+ ;
+ else if( alg == 'R' )
+ putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j, lastresx[i]+j, 'h' );
+ else if( alg == 'r' )
+ putlocalhom_last( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), lastresx[i]+j-(njob-nadd), 'h' );// ?????
+ else if( alg == 'H' )
+ putlocalhom_ext( mseq1[0], mseq2[0], localhomtable[i]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' );
+ else if( alg == 'Y' )
+ putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-(njob-nadd), off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' );
+ else if( !specifictarget && alg != 'S' && alg != 'V' )
+ putlocalhom2( mseq1[0], mseq2[0], localhomtable[i]+j-i, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' );
+ else
+ {
+ if( targetmap[i] != -1 && targetmap[j] != -1 )
+ {
+ putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' );
+ putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' ); // sukoshi muda.
+ }
+ else if( targetmap[j] != -1 )
+ putlocalhom2( mseq2[0], mseq1[0], localhomtable[targetmap[j]]+i, off2, off1, (int)pscore, strlen( mseq2[0] ), 'h' );
+ else if( targetmap[i] != -1 )
+ putlocalhom2( mseq1[0], mseq2[0], localhomtable[targetmap[i]]+j, off1, off2, (int)pscore, strlen( mseq1[0] ), 'h' );
+ else
+ {
+ reporterr( "okashii\n" );
+ exit( 1 );
+ }
+ }
+ }
+
+ pscore = score2dist( pscore, selfscore[i], selfscore[j] );
}
else
{
}
}
if( stdout_dist ) fprintf( stdout, "%d %d d=%.3f\n", i+1, j+1, pscore );
- if( store_dist) distancemtx[i][j] = pscore;
+ if( store_dist)
+ {
+ if( alg == 'Y' || alg == 'r' ) distancemtx[i][j-(njob-nadd)] = pscore;
+ else distancemtx[i][j-i] = pscore;
+ }
}
}
+ if( dynamicmtx ) FreeDoubleMtx( dynamicmtx );
}
- if( store_dist )
+ if( store_dist && ngui == 0 )
{
hat2p = fopen( hat2file, "w" );
if( !hat2p ) ErrorExit( "Cannot open hat2." );
- WriteHat2_pointer( hat2p, njob, name, distancemtx );
+ if( alg == 'Y' || alg == 'r' )
+ WriteHat2_part_pointer( hat2p, njob, nadd, name, distancemtx );
+ else
+// WriteHat2_pointer( hat2p, njob, name, distancemtx );
+ WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, distancemtx ); // jissiha double
fclose( hat2p );
}
hat3p = fopen( "hat3", "w" );
if( !hat3p ) ErrorExit( "Cannot open hat3." );
- if( store_localhom )
+ if( store_localhom && ngui == 0 )
{
+
fprintf( stderr, "\n\n##### writing hat3\n" );
- ilim = njob-1;
+ if( alg == 'Y' || alg == 'r' )
+ ilim = njob-nadd;
+ else if( specifictarget )
+ ilim = ntarget;
+ else
+ ilim = njob-1;
for( i=0; i<ilim; i++ )
{
- for( j=i+1; j<njob; j++ )
+ if( alg == 'Y' || alg == 'r' )
+ {
+ jst = njob-nadd;
+ jj = 0;
+ }
+ else if( specifictarget )
{
- for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
+ jst = 0;
+ jj = 0;
+ }
+ else
+ {
+ jst = i;
+ jj = 0;
+ }
+ for( j=jst; j<njob; j++, jj++ )
+ {
+ for( tmpptr=localhomtable[i]+jj; tmpptr; tmpptr=tmpptr->next )
{
+// fprintf( stderr, "j=%d, jj=%d\n", j, jj );
if( tmpptr->opt == -1.0 ) continue;
// tmptmptmptmptmp
// if( alg == 'B' || alg == 'T' )
// fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d %p\n", i, j, tmpptr->overlapaa, 1.0, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, (void *)tmpptr->next );
// else
- fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d h\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 );
+ if( targetmap[j] == -1 || targetmap[i] < targetmap[j] )
+ fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d h\n", targetmapr[i], j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2 );
+// fprintf( hat3p, "%d %d %d %7.5f %d %d %d %d h\n", i, j, tmpptr->overlapaa, tmpptr->opt, tmpptr->start1, tmpptr->end1, tmpptr->start2+1, tmpptr->end2+1 ); // zettai dame!!!!
}
}
}
+// if( ngui == 0 )
+// {
#if DEBUG
- fprintf( stderr, "calling FreeLocalHomTable\n" );
+ fprintf( stderr, "calling FreeLocalHomTable\n" );
#endif
- FreeLocalHomTable( localhomtable, njob );
+ if( alg == 'Y' || alg == 'r' )
+ FreeLocalHomTable_part( localhomtable, (njob-nadd), nadd );
+ else if( specifictarget )
+ FreeLocalHomTable_part( localhomtable, ntarget, njob );
+ else
+ FreeLocalHomTable_half( localhomtable, njob );
#if DEBUG
- fprintf( stderr, "done. FreeLocalHomTable\n" );
+ fprintf( stderr, "done. FreeLocalHomTable\n" );
#endif
+// }
}
fclose( hat3p );
free( selfscore );
free( effarr1 );
free( effarr2 );
- free( indication1 );
- free( indication2 );
- if( store_dist ) FreeDoubleMtx( distancemtx );
-}
-
-static void WriteOptions( FILE *fp )
-{
-
- if( dorp == 'd' ) fprintf( fp, "DNA\n" );
- else
- {
- if ( scoremtx == 0 ) fprintf( fp, "JTT %dPAM\n", pamN );
- else if( scoremtx == 1 ) fprintf( fp, "BLOSUM %d\n", nblosum );
- else if( scoremtx == 2 ) fprintf( fp, "M-Y\n" );
- }
- fprintf( stderr, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
- if( use_fft ) fprintf( fp, "FFT on\n" );
-
- fprintf( fp, "tree-base method\n" );
- if( tbrweight == 0 ) fprintf( fp, "unweighted\n" );
- else if( tbrweight == 3 ) fprintf( fp, "clustalw-like weighting\n" );
- if( tbitr || tbweight )
+ if( alg == 'N' )
{
- fprintf( fp, "iterate at each step\n" );
- if( tbitr && tbrweight == 0 ) fprintf( fp, " unweighted\n" );
- if( tbitr && tbrweight == 3 ) fprintf( fp, " reversely weighted\n" );
- if( tbweight ) fprintf( fp, " weighted\n" );
- fprintf( fp, "\n" );
+ FreeCharMtx( dumseq1 );
+ FreeCharMtx( dumseq2 );
}
+ free( distseq1 );
+ free( distseq2 );
+ if( store_dist && ngui == 0 ) FreeDoubleHalfMtx( distancemtx, njob );
- fprintf( fp, "Gap Penalty = %+5.2f, %+5.2f, %+5.2f\n", (double)ppenalty/1000, (double)ppenalty_ex/1000, (double)poffset/1000 );
-
- if( alg == 'a' )
- fprintf( fp, "Algorithm A\n" );
- else if( alg == 'A' )
- fprintf( fp, "Algorithm A+\n" );
- else if( alg == 'S' )
- fprintf( fp, "Apgorithm S\n" );
- else
- fprintf( fp, "Unknown algorithm\n" );
-
- if( use_fft )
- {
- fprintf( fp, "FFT on\n" );
- if( dorp == 'd' )
- fprintf( fp, "Basis : 4 nucleotides\n" );
- else
- {
- if( fftscore )
- fprintf( fp, "Basis : Polarity and Volume\n" );
- else
- fprintf( fp, "Basis : 20 amino acids\n" );
- }
- fprintf( fp, "Threshold of anchors = %d%%\n", fftThreshold );
- fprintf( fp, "window size of anchors = %dsites\n", fftWinSize );
- }
- else
- fprintf( fp, "FFT off\n" );
- fflush( fp );
+ free( targetmap );
+ free( targetmapr );
}
-
-int main( int argc, char *argv[] )
+
+int pairlocalalign( int ngui, int lgui, char **namegui, char **seqgui, double **distancemtx, LocalHom **localhomtable, int argc, char **argv )
{
- int nlen[M];
+ int *nlen, *thereisxineachseq;
char **name, **seq;
char **mseq1, **mseq2;
char **aseq;
char **bseq;
- double *eff;
- int i;
+ char **dseq;
+ int i, j, k;
FILE *infp;
char c;
int alloclen;
+ Lastresx **lastresx;
+
+// reporterr( "argc=%d, argv[0]=%s\n", argc, argv[0] );
arguments( argc, argv );
-#ifndef enablemultithread
- nthread = 0;
-#endif
- if( inputfile )
+
+ if( !ngui )
{
- infp = fopen( inputfile, "r" );
- if( !infp )
+ if( inputfile )
+ {
+ infp = fopen( inputfile, "r" );
+ if( !infp )
+ {
+ fprintf( stderr, "Cannot open %s\n", inputfile );
+ exit( 1 );
+ }
+ }
+ else
+ infp = stdin;
+
+ getnumlen( infp );
+ rewind( infp );
+
+ if( njob < 2 )
+ {
+ fprintf( stderr, "At least 2 sequences should be input!\n"
+ "Only %d sequence found.\n", njob );
+ exit( 1 );
+ }
+ if( njob > M )
{
- fprintf( stderr, "Cannot open %s\n", inputfile );
+ fprintf( stderr, "The number of sequences must be < %d\n", M );
+ fprintf( stderr, "Please try the splittbfast program for such large data.\n" );
exit( 1 );
}
}
- else
- infp = stdin;
-
- getnumlen( infp );
- rewind( infp );
- if( njob < 2 )
+ if( ( alg == 'r' || alg == 'R' ) && dorp == 'p' )
{
- fprintf( stderr, "At least 2 sequences should be input!\n"
- "Only %d sequence found.\n", njob );
+ fprintf( stderr, "Not yet supported\n" );
exit( 1 );
}
- if( njob > M )
+
+ alloclen = nlenmax*2;
+ if( ngui )
+ {
+ seq = seqgui;
+ name = namegui;
+ }
+ else
{
- fprintf( stderr, "The number of sequences must be < %d\n", M );
- fprintf( stderr, "Please try the splittbfast program for such large data.\n" );
- exit( 1 );
+ seq = AllocateCharMtx( njob, alloclen+10 );
+ name = AllocateCharMtx( njob, B );
}
- alloclen = nlenmax*2;
- seq = AllocateCharMtx( njob, alloclen+10 );
- aseq = AllocateCharMtx( njob, alloclen+10 );
+ aseq = AllocateCharMtx( 2, alloclen+10 );
bseq = AllocateCharMtx( njob, alloclen+10 );
+ dseq = AllocateCharMtx( njob, alloclen+10 );
mseq1 = AllocateCharMtx( njob, 0 );
mseq2 = AllocateCharMtx( njob, 0 );
- name = AllocateCharMtx( njob, B );
+ nlen = AllocateIntVec( njob );
+ thereisxineachseq = AllocateIntVec( njob );
+
- eff = AllocateDoubleVec( njob );
+ if( alg == 'R' )
+ {
+ lastresx = calloc( njob+1, sizeof( Lastresx * ) );
+ for( i=0; i<njob; i++ )
+ {
+ lastresx[i] = calloc( njob+1, sizeof( Lastresx ) ); // muda
+ for( j=0; j<njob; j++ )
+ {
+ lastresx[i][j].score = 0;
+ lastresx[i][j].naln = 0;
+ lastresx[i][j].aln = NULL;
+ }
+ lastresx[i][njob].naln = -1;
+ }
+ lastresx[njob] = NULL;
+ }
+ else if( alg == 'r' )
+ {
+// fprintf( stderr, "Allocating lastresx (%d), njob=%d, nadd=%d\n", njob-nadd+1, njob, nadd );
+ lastresx = calloc( njob-nadd+1, sizeof( Lastresx * ) );
+ for( i=0; i<njob-nadd; i++ )
+ {
+// fprintf( stderr, "Allocating lastresx[%d]\n", i );
+ lastresx[i] = calloc( nadd+1, sizeof( Lastresx ) );
+ for( j=0; j<nadd; j++ )
+ {
+// fprintf( stderr, "Initializing lastresx[%d][%d]\n", i, j );
+ lastresx[i][j].score = 0;
+ lastresx[i][j].naln = 0;
+ lastresx[i][j].aln = NULL;
+ }
+ lastresx[i][nadd].naln = -1;
+ }
+ lastresx[njob-nadd] = NULL;
+ }
+ else
+ lastresx = NULL;
#if 0
Read( name, nlen, seq );
#else
- readData_pointer( infp, name, nlen, seq );
+ if( !ngui )
+ {
+ readData_pointer( infp, name, nlen, seq );
+ fclose( infp );
+ }
#endif
- fclose( infp );
constants( njob, seq );
initFiles();
- WriteOptions( trap_g );
+// WriteOptions( trap_g );
c = seqcheck( seq );
if( c )
// writePre( njob, name, nlen, seq, 0 );
- for( i=0; i<njob; i++ ) eff[i] = 1.0;
- for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
- pairalign( name, nlen, bseq, aseq, mseq1, mseq2, eff, alloclen );
+ for( i=0; i<njob; i++ )
+ {
+ gappick0( bseq[i], seq[i] );
+ thereisxineachseq[i] = removex( dseq[i], bseq[i] );
+ }
+
+ pairalign( name, nlen, bseq, aseq, dseq, thereisxineachseq, mseq1, mseq2, alloclen, lastresx, distancemtx, localhomtable, ngui );
fprintf( trap_g, "done.\n" );
#if DEBUG
fprintf( stderr, "closing trap_g\n" );
#endif
fclose( trap_g );
+ fclose( prep_g );
// writePre( njob, name, nlen, aseq, !contin );
#if 0
{
fprintf( stderr, "\nThe order of pairwise alignments is not identical to that in the input file, because of the parallel calculation. Reorder them by yourself.\n" );
}
- FreeCharMtx( seq );
+
+#if 1
+ if( lastresx )
+ {
+ for( i=0; lastresx[i]; i++ )
+ {
+ for( j=0; lastresx[i][j].naln!=-1; j++ )
+ {
+ for( k=0; k<lastresx[i][j].naln; k++ )
+ {
+ free( lastresx[i][j].aln[k].reg1 );
+ free( lastresx[i][j].aln[k].reg2 );
+ }
+ free( lastresx[i][j].aln );
+ }
+ free( lastresx[i] );
+ }
+ free( lastresx );
+ }
+#endif
+ if( ngui == 0 )
+ {
+ FreeCharMtx( seq );
+ FreeCharMtx( name );
+ }
FreeCharMtx( aseq );
FreeCharMtx( bseq );
- FreeCharMtx( name );
+ FreeCharMtx( dseq );
free( mseq1 );
free( mseq2 );
- free( eff );
+ free( nlen );
+ free( thereisxineachseq );
+ freeconstants();
+
+ if( !ngui )
+ {
+ FreeCommonIP();
+ }
+ Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+ G__align11( NULL, NULL, NULL, 0, 0, 0 ); // 20130603
+ G__align11_noalign( NULL, 0, 0, NULL, NULL, 0 );
+ L__align11( NULL, 0.0, NULL, NULL, 0, NULL, NULL );
+ L__align11_noalign( NULL, NULL, NULL );
+ genL__align11( NULL, NULL, NULL, 0, NULL, NULL );
+
+#if SHISHAGONYU
+ if( ngui )
+ {
+ char buf[100];
+ for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ )
+ {
+ sprintf( buf, "%5.3f", distancemtx[i][j-i] );
+ distancemtx[i][j-i] = 0.0;
+ sscanf( buf, "%lf", distancemtx[i]+j-i );
+// distancemtx[i][j-i] = 0.001 * (int)(distancemtx[i][j-i] * 1000 + 0.5);
+ }
+
+ }
+#endif
+
return( 0 );
}
+
--- /dev/null
+#include "mltaln.h"
+int main( int argc, char **argv )
+{
+ int res = pairlocalalign( 0, 0, NULL, NULL, NULL, NULL, argc, argv );
+ if( res == GUI_CANCEL ) res = 0; // treeout de goto chudan wo riyousuru
+ return res;
+}
#define FASTMATCHCALC 1
#if 0
-static void st_OpeningGapCount( float *ogcp, int clus, char **seq, double *eff, int len )
+static void st_OpeningGapCount( double *ogcp, int clus, char **seq, double *eff, int len )
{
int i, j, gc, gb;
- float feff;
+ double feff;
for( i=0; i<len; i++ ) ogcp[i] = 0.0;
for( j=0; j<clus; j++ )
{
- feff = (float)eff[j];
+ feff = (double)eff[j];
gc = 0;
for( i=0; i<len; i++ )
{
}
}
-static void st_FinalGapCount( float *fgcp, int clus, char **seq, double *eff, int len )
+static void st_FinalGapCount( double *fgcp, int clus, char **seq, double *eff, int len )
{
int i, j, gc, gb;
- float feff;
+ double feff;
for( i=0; i<len; i++ ) fgcp[i] = 0.0;
for( j=0; j<clus; j++ )
{
- feff = (float)eff[j];
+ feff = (double)eff[j];
gc = ( seq[j][0] == '-' );
for( i=1; i<len+1; i++ )
{
static TLS int impalloclen = 0;
-static TLS float **impmtx = NULL;
-float part_imp_match_out_sc( int i1, int j1 )
+static TLS double **impmtx = NULL;
+double part_imp_match_out_sc( int i1, int j1 )
{
// fprintf( stderr, "impalloclen = %d\n", impalloclen );
// fprintf( stderr, "i1,j1=%d,%d -> impmtx=%f\n", i1, j1, impmtx[i1][j1] );
return( impmtx[i1+start1][j1+start2] );
#endif
}
-static void part_imp_match_out_vead_gapmap( float *imp, int i1, int lgth2, int start2, int *gapmap2 )
+static void part_imp_match_out_vead_gapmap( double *imp, int i1, int lgth2, int start2, int *gapmap2 )
{
#if FASTMACHCALC
- float *pt = imp;
+ double *pt = imp;
int *gapmappt = gapmap2;
while( lgth2-- )
*pt++ += impmtx[i1][start2+*gapmappt++];
#endif
}
-static void part_imp_match_out_vead_tate_gapmap( float *imp, int j1, int lgth1, int start1, int *gapmap1 )
+static void part_imp_match_out_vead_tate_gapmap( double *imp, int j1, int lgth1, int start1, int *gapmap1 )
{
#if FASTMACHCALC
- float *pt = imp;
+ double *pt = imp;
int *gapmappt = gapmap1;
while( lgth1-- )
*pt++ = impmtx[start1+*gapmappt++][j1];
#endif
}
-void part_imp_match_init_strict( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, int forscore )
+#if 1
+void part_imp_match_init_strict( double *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, double *eff1_kozo, double *eff2_kozo, LocalHom ***localhom, char *swaplist, int forscore, int *orinum1, int *orinum2 )
{
- int i, j, k1, k2, tmpint, start1, start2, end1, end2;
- double effij, effijx, effij_kozo;
- char *pt, *pt1, *pt2;
- LocalHom *tmpptr;
+// int i, j, k1, k2, tmpint, start1, start2, end1, end2;
+// double effij;
+// double effij_kozo;
+// double effijx;
+// char *pt, *pt1, *pt2;
+// static TLS char *nocount1 = NULL;
+// static TLS char *nocount2 = NULL;
+// LocalHom *tmpptr;
if( seq1 == NULL )
{
if( impmtx ) FreeFloatMtx( impmtx );
impmtx = NULL;
+// if( nocount1 ) free( nocount1 );
+// nocount1 = NULL;
+// if( nocount2 ) free( nocount2 );
+// nocount2 = NULL;
+
return;
}
- if( impalloclen <= lgth1 + 2 || impalloclen <= lgth2 + 2 )
+ if( impalloclen < lgth1 + 2 || impalloclen < lgth2 + 2 )
{
if( impmtx ) FreeFloatMtx( impmtx );
+// if( nocount1 ) free( nocount1 );
+// if( nocount2 ) free( nocount2 );
impalloclen = MAX( lgth1, lgth2 ) + 2;
- impmtx = AllocateFloatMtx( impalloclen+100, impalloclen+100 );
+ impmtx = AllocateFloatMtx( impalloclen, impalloclen );
+// nocount1 = AllocateCharVec( impalloclen );
+// nocount2 = AllocateCharVec( impalloclen );
}
-
-#if 0
- fprintf( stderr, "eff1 in _init_strict = \n" );
- for( i=0; i<clus1; i++ )
- fprintf( stderr, "eff1[] = %f\n", eff1[i] );
- for( i=0; i<clus2; i++ )
- fprintf( stderr, "eff2[] = %f\n", eff2[i] );
-#endif
-
- for( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )
- impmtx[i][j] = 0.0;
- effijx = 1.0 * fastathreshold;
- for( i=0; i<clus1; i++ )
- {
- for( j=0; j<clus2; j++ )
- {
- effij = eff1[i] * eff2[j] * effijx;
- effij_kozo = eff1_kozo[i] * eff2_kozo[j] * effijx;
- tmpptr = localhom[i][j];
- while( tmpptr )
- {
-// fprintf( stderr, "start1 = %d\n", tmpptr->start1 );
-// fprintf( stderr, "end1 = %d\n", tmpptr->end1 );
-// fprintf( stderr, "i = %d, seq1 = \n%s\n", i, seq1[i] );
-// fprintf( stderr, "j = %d, seq2 = \n%s\n", j, seq2[j] );
- pt = seq1[i];
- tmpint = -1;
- while( *pt != 0 )
- {
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == tmpptr->start1 ) break;
- }
- start1 = (int)( pt - seq1[i] ) - 1;
-
- if( tmpptr->start1 == tmpptr->end1 ) end1 = start1;
- else
- {
-#if MACHIGAI
- while( *pt != 0 )
- {
- if( tmpint == tmpptr->end1 ) break;
- if( *pt++ != '-' ) tmpint++;
- }
- end1 = (int)( pt - seq1[i] ) - 1;
-#else
- while( *pt != 0 )
- {
-// fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, tmpptr->end1, pt-seq1[i] );
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == tmpptr->end1 ) break;
- }
- end1 = (int)( pt - seq1[i] ) - 1;
-#endif
- }
-
- pt = seq2[j];
- tmpint = -1;
- while( *pt != 0 )
- {
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == tmpptr->start2 ) break;
- }
- start2 = (int)( pt - seq2[j] ) - 1;
- if( tmpptr->start2 == tmpptr->end2 ) end2 = start2;
- else
- {
-#if MACHIGAI
- while( *pt != 0 )
- {
- if( tmpint == tmpptr->end2 ) break;
- if( *pt++ != '-' ) tmpint++;
- }
- end2 = (int)( pt - seq2[j] ) - 1;
+ fillimp( impmtx, imp, clus1, clus2, lgth1, lgth2, seq1, seq2, eff1, eff2, eff1_kozo, eff2_kozo, localhom, swaplist, forscore, orinum1, orinum2 );
+}
#else
- while( *pt != 0 )
- {
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == tmpptr->end2 ) break;
- }
- end2 = (int)( pt - seq2[j] ) - 1;
-#endif
- }
-// fprintf( stderr, "start1 = %d (%c), end1 = %d (%c), start2 = %d (%c), end2 = %d (%c)\n", start1, seq1[i][start1], end1, seq1[i][end1], start2, seq2[j][start2], end2, seq2[j][end2] );
-// fprintf( stderr, "step 0\n" );
- if( end1 - start1 != end2 - start2 )
- {
-// fprintf( stderr, "CHUUI!!, start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
- }
-
- k1 = start1; k2 = start2;
- pt1 = seq1[i] + k1;
- pt2 = seq2[j] + k2;
- while( *pt1 && *pt2 )
- {
- if( *pt1 != '-' && *pt2 != '-' )
- {
-// ½Å¤ß¤òÆó½Å¤Ë¤«¤±¤Ê¤¤¤è¤¦¤ËÃí°Õ¤·¤Æ²¼¤µ¤¤¡£
-// impmtx[k1][k2] += tmpptr->wimportance * fastathreshold;
-// impmtx[k1][k2] += tmpptr->importance * effij;
-// impmtx[k1][k2] += tmpptr->fimportance * effij;
- if( tmpptr->korh == 'k' )
- impmtx[k1][k2] += tmpptr->fimportance * effij_kozo;
- else
- impmtx[k1][k2] += tmpptr->fimportance * effij;
-// fprintf( stderr, "k1=%d, k2=%d, impalloclen=%d\n", k1, k2, impalloclen );
-// fprintf( stderr, "mark, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
- k1++; k2++;
- pt1++; pt2++;
- }
- else if( *pt1 != '-' && *pt2 == '-' )
- {
-// fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
- k2++; pt2++;
- }
- else if( *pt1 == '-' && *pt2 != '-' )
- {
-// fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
- k1++; pt1++;
- }
- else if( *pt1 == '-' && *pt2 == '-' )
- {
-// fprintf( stderr, "skip, %d (%c) - %d (%c) \n", k1, *pt1, k2, *pt2 );
- k1++; pt1++;
- k2++; pt2++;
- }
- if( k1 > end1 || k2 > end2 ) break;
- }
- tmpptr = tmpptr->next;
- }
- }
- }
-#if 0
- fprintf( stderr, "impmtx = \n" );
- for( k2=0; k2<lgth2; k2++ )
- fprintf( stderr, "%6.3f ", (double)k2 );
- fprintf( stderr, "\n" );
- for( k1=0; k1<lgth1; k1++ )
- {
- fprintf( stderr, "%d", k1 );
- for( k2=0; k2<lgth2; k2++ )
- fprintf( stderr, "%2.1f ", impmtx[k1][k2] );
- fprintf( stderr, "\n" );
- }
- exit( 1 );
#endif
-}
void part_imp_rna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***grouprna1, RNApair ***grouprna2, int *gapmap1, int *gapmap2, RNApair *additionalpair )
}
-void part_imp_match_init( float *imp, int clus1, int clus2, int lgth1, int lgth2, char **seq1, char **seq2, double *eff1, double *eff2, LocalHom ***localhom )
-{
- int dif, i, j, k1, k2, tmpint, start1, start2, end1, end2;
- static TLS int impalloclen = 0;
- char *pt;
- static TLS char *nocount1 = NULL;
- static TLS char *nocount2 = NULL;
-
- if( impalloclen < lgth1 || impalloclen < lgth2 )
- {
- if( impmtx ) FreeFloatMtx( impmtx );
- if( nocount1 ) free( nocount1 );
- if( nocount2 ) free( nocount2 );
- impalloclen = MAX( lgth1, lgth2 ) + 2;
- impmtx = AllocateFloatMtx( impalloclen, impalloclen );
- nocount1 = AllocateCharVec( impalloclen );
- nocount2 = AllocateCharVec( impalloclen );
- impalloclen -= 2;
- }
- for( i=0; i<lgth1; i++ )
- {
- for( j=0; j<clus1; j++ )
- if( seq1[j][i] == '-' ) break;
- if( j != clus1 ) nocount1[i] = 1;
- else nocount1[i] = 0;
- }
- for( i=0; i<lgth2; i++ )
- {
- for( j=0; j<clus2; j++ )
- if( seq2[j][i] == '-' ) break;
- if( j != clus2 ) nocount2[i] = 1;
- else nocount2[i] = 0;
- }
-#if 0
-fprintf( stderr, "nocount2 =\n" );
-for( i = 0; i<impalloclen; i++ )
+static void match_calc( double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
{
- fprintf( stderr, "nocount2[%d] = %d (%c)\n", i, nocount2[i], seq2[0][i] );
-}
-#endif
-
- for( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )
- impmtx[i][j] = 0.0;
- for( i=0; i<clus1; i++ )
+#if FASTMATCHCALC
+ int j, l;
+// double scarr[26];
+ double **cpmxpd = doublework;
+ int **cpmxpdn = intwork;
+ double *matchpt, *cpmxpdpt, **cpmxpdptpt;
+ int *cpmxpdnpt, **cpmxpdnptpt;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
+ if( initialize )
{
- fprintf( stderr, "i = %d, seq1 = %s\n", i, seq1[i] );
- for( j=0; j<clus2; j++ )
+ int count = 0;
+ for( j=0; j<lgth2; j++ )
{
- fprintf( stderr, "start1 = %d\n", localhom[i][j]->start1 );
- fprintf( stderr, "end1 = %d\n", localhom[i][j]->end1 );
- fprintf( stderr, "j = %d, seq2 = %s\n", j, seq2[j] );
- pt = seq1[i];
- tmpint = -1;
- while( *pt != 0 )
- {
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == localhom[i][j]->start1 ) break;
- }
- start1 = pt - seq1[i] - 1;
-
- while( *pt != 0 )
- {
-// fprintf( stderr, "tmpint = %d, end1 = %d pos = %d\n", tmpint, localhom[i][j].end1, pt-seq1[i] );
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == localhom[i][j]->end1 ) break;
- }
- end1 = pt - seq1[i] - 1;
-
- pt = seq2[j];
- tmpint = -1;
- while( *pt != 0 )
- {
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == localhom[i][j]->start2 ) break;
- }
- start2 = pt - seq2[j] - 1;
- while( *pt != 0 )
- {
- if( *pt++ != '-' ) tmpint++;
- if( tmpint == localhom[i][j]->end2 ) break;
- }
- end2 = pt - seq2[j] - 1;
-// fprintf( stderr, "start1 = %d, end1 = %d, start2 = %d, end2 = %d\n", start1, end1, start2, end2 );
- k1 = start1;
- k2 = start2;
- fprintf( stderr, "step 0\n" );
- while( k1 <= end1 && k2 <= end2 )
- {
-#if 0
- if( !nocount1[k1] && !nocount2[k2] )
- impmtx[k1][k2] += localhom[i][j].wimportance * eff1[i] * eff2[j];
- k1++; k2++;
-#else
- if( !nocount1[k1] && !nocount2[k2] )
- impmtx[k1][k2] += localhom[i][j]->wimportance * eff1[i] * eff2[j];
- k1++; k2++;
-#endif
- }
-
- dif = ( end1 - start1 ) - ( end2 - start2 );
- fprintf( stderr, "dif = %d\n", dif );
- if( dif > 0 )
+ count = 0;
+ for( l=0; l<nalphabets; l++ )
{
- do
+ if( cpmx2[l][j] )
{
- fprintf( stderr, "dif = %d\n", dif );
- k1 = start1;
- k2 = start2 - dif;
- while( k1 <= end1 && k2 <= end2 )
- {
- if( 0 <= k2 && start2 <= k2 && !nocount1[k1] && !nocount2[k2] )
- impmtx[k1][k2] = localhom[i][j]->wimportance * eff1[i] * eff2[j];
- k1++; k2++;
- }
+ cpmxpd[j][count] = cpmx2[l][j];
+ cpmxpdn[j][count] = l;
+ count++;
}
- while( dif-- );
}
- else
+ cpmxpdn[j][count] = -1;
+ }
+ }
+
+ {
+ for( l=0; l<nalphabets; l++ )
+ {
+ scarr[l] = 0.0;
+ for( j=0; j<nalphabets; j++ )
+ scarr[l] += n_dis_consweight_multi[j][l] * cpmx1[j][i1];
+// scarr[l] += n_dis[j][l] * cpmx1[j][i1];
+ }
+ matchpt = match;
+ cpmxpdnptpt = cpmxpdn;
+ cpmxpdptpt = cpmxpd;
+ while( lgth2-- )
+ {
+ *matchpt = 0.0;
+ cpmxpdnpt = *cpmxpdnptpt++;
+ cpmxpdpt = *cpmxpdptpt++;
+ while( *cpmxpdnpt>-1 )
+ *matchpt += scarr[*cpmxpdnpt++] * *cpmxpdpt++;
+ matchpt++;
+ }
+ }
+ free( scarr );
+#else
+ int j, k, l;
+// double scarr[26];
+ double **cpmxpd = doublework;
+ int **cpmxpdn = intwork;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
+ // simple
+ if( initialize )
+ {
+ int count = 0;
+ for( j=0; j<lgth2; j++ )
+ {
+ count = 0;
+ for( l=0; l<nalphabets; l++ )
{
- do
+ if( cpmx2[l][j] )
{
- k1 = start1 + dif;
- k2 = start2;
- while( k1 <= end1 )
- {
- if( k1 >= 0 && k1 >= start1 && !nocount1[k1] && !nocount2[k2] )
- impmtx[k1][k2] = localhom[i][j]->wimportance * eff1[i] * eff2[j];
- k1++; k2++;
- }
+ cpmxpd[count][j] = cpmx2[l][j];
+ cpmxpdn[count][j] = l;
+ count++;
}
- while( dif++ );
}
+ cpmxpdn[count][j] = -1;
}
}
-#if 0
- fprintf( stderr, "impmtx = \n" );
- for( k2=0; k2<lgth2; k2++ )
- fprintf( stderr, "%6.3f ", (double)k2 );
- fprintf( stderr, "\n" );
- for( k1=0; k1<lgth1; k1++ )
+ for( l=0; l<nalphabets; l++ )
{
- fprintf( stderr, "%d", k1 );
- for( k2=0; k2<lgth2; k2++ )
- fprintf( stderr, "%6.3f ", impmtx[k1][k2] );
- fprintf( stderr, "\n" );
+ scarr[l] = 0.0;
+ for( k=0; k<nalphabets; k++ )
+ scarr[l] += n_dis_consweight_multi[k][l] * cpmx1[k][i1];
+// scarr[l] += n_dis[k][l] * cpmx1[k][i1];
}
- exit( 1 );
+ for( j=0; j<lgth2; j++ )
+ {
+ match[j] = 0.0;
+ for( k=0; cpmxpdn[k][j]>-1; k++ )
+ match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j];
+ }
#endif
}
-static void match_calc( float *match, float **cpmx1, float **cpmx2, int i1, int lgth2, float **floatwork, int **intwork, int initialize )
+
+static void fillzero( double *s, int l )
+{
+ while( l-- ) *s++ = 0.0;
+}
+
+
+static void match_calc_del( int **which, double ***matrices, double *match, int n1, char **seq1, double *eff1, int n2, char **seq2, double *eff2, int i1, int lgth2, int mid, int nmask, int *mask1, int *mask2 )
+{
+// osoi!
+ int i, j, k, m;
+ int c1, c2;
+// fprintf( stderr, "\nmatch_calc_dynamicmtx... %d", i1 );
+// fprintf( stderr, "\nseq1[0]=%s\n", seq1[0] );
+// fprintf( stderr, "\nseq2[0]=%s\n", seq2[0] );
+// for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+// {
+// if( flip ) reporterr( "in match_calc_slow, which[%d][%d] = %d\n", j, i, which[j][i] );
+// else reporterr( "in match_calc_slow, which[%d][%d] = %d\n", i, j, which[i][j] );
+// }
+ for( k=0; k<lgth2; k++ )
+ {
+ for( m=0; m<nmask; m++ )
+ {
+ i = mask1[m];
+ j = mask2[m];
+// reporterr( "Deleting %d-%d (c=%d)\n", i, j, mid );
+// if( k==0 ) fprintf( stderr, "pairoffset[%d][%d] = %f\n", i, j, po );
+ c1 = amino_n[(int)seq1[i][i1]];
+ c2 = amino_n[(int)seq2[j][k]];
+// reporterr( "k=%d, c1=%d, c2=%d, seq1[i][i1]=%c, seq2[%d][%d]=%c\n", k, c1, c2, seq1[i][i1], j, k, seq2[j][k] );
+ if( seq1[i][i1] == '-' || seq2[j][k] == '-' ) continue;
+ if( c1 < 0 || c2 < 0 ) continue;
+// fprintf( stderr, "c1=%d, c2=%d\n", c1, c2 );
+// fprintf( stderr, "match[k] = %f -> ", match[k], mid );
+ match[k] -= matrices[mid][c1][c2] * eff1[i] * eff2[j];
+// fprintf( stderr, "match[k] = %f (mid=%d)\n", match[k], mid );
+ }
+ }
+// fprintf( stderr, "done\n" );
+ return;
+}
+
+static void match_calc_add( double **scoreingmtx, double *match, double **cpmx1, double **cpmx2, int i1, int lgth2, double **doublework, int **intwork, int initialize )
{
#if FASTMATCHCALC
+// fprintf( stderr, "\nmatch_calc... %d", i1 );
int j, l;
- float scarr[26];
- float **cpmxpd = floatwork;
+// double scarr[26];
+ double **cpmxpd = doublework;
int **cpmxpdn = intwork;
- float *matchpt, *cpmxpdpt, **cpmxpdptpt;
+ double *matchpt, *cpmxpdpt, **cpmxpdptpt;
int *cpmxpdnpt, **cpmxpdnptpt;
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
if( initialize )
{
int count = 0;
for( j=0; j<lgth2; j++ )
{
count = 0;
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
if( cpmx2[l][j] )
{
}
{
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
scarr[l] = 0.0;
- for( j=0; j<26; j++ )
- scarr[l] += n_dis_consweight_multi[j][l] * cpmx1[j][i1];
+ for( j=0; j<nalphabets; j++ )
// scarr[l] += n_dis[j][l] * cpmx1[j][i1];
+// scarr[l] += n_dis_consweight_multi[j][l] * cpmx1[j][i1];
+ scarr[l] += scoreingmtx[j][l] * cpmx1[j][i1];
}
matchpt = match;
cpmxpdnptpt = cpmxpdn;
cpmxpdptpt = cpmxpd;
while( lgth2-- )
{
- *matchpt = 0.0;
+// *matchpt = 0.0;
cpmxpdnpt = *cpmxpdnptpt++;
cpmxpdpt = *cpmxpdptpt++;
while( *cpmxpdnpt>-1 )
matchpt++;
}
}
+ free( scarr );
+// fprintf( stderr, "done\n" );
#else
int j, k, l;
- float scarr[26];
- float **cpmxpd = floatwork;
+// double scarr[26];
+ double **cpmxpd = doublework;
int **cpmxpdn = intwork;
- // simple
+ double *scarr;
+ scarr = calloc( nalphabets, sizeof( double ) );
+// simple
if( initialize )
{
int count = 0;
for( j=0; j<lgth2; j++ )
{
count = 0;
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
if( cpmx2[l][j] )
{
cpmxpdn[count][j] = -1;
}
}
- for( l=0; l<26; l++ )
+ for( l=0; l<nalphabets; l++ )
{
scarr[l] = 0.0;
- for( k=0; k<26; k++ )
- scarr[l] += n_dis_consweight_multi[k][l] * cpmx1[k][i1];
+ for( k=0; k<nalphabets; k++ )
// scarr[l] += n_dis[k][l] * cpmx1[k][i1];
+// scarr[l] += n_dis_consweight_multi[k][l] * cpmx1[k][i1];
+ scarr[l] += scoreingmtx[k][l] * cpmx1[k][i1];
}
for( j=0; j<lgth2; j++ )
{
for( k=0; cpmxpdn[k][j]>-1; k++ )
match[j] += scarr[cpmxpdn[k][j]] * cpmxpd[k][j];
}
+ free( scarr );
#endif
}
-static void Atracking_localhom( float *impwmpt, float *lasthorizontalw, float *lastverticalw,
+static void Atracking_localhom( double *impwmpt, double *lasthorizontalw, double *lastverticalw,
char **seq1, char **seq2,
char **mseq1, char **mseq2,
- float **cpmx1, float **cpmx2,
int **ijp, int icyc, int jcyc,
int start1, int end1, int start2, int end2,
- int *gapmap1, int *gapmap2 )
+ int *gapmap1, int *gapmap2,
+ int *warpis, int *warpjs, int warpbase )
{
- int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k;
+ int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, limk;
// char gap[] = "-";
char *gap;
- float wm;
+ double wm;
gap = newgapstr;
lgth1 = strlen( seq1[0] );
lgth2 = strlen( seq2[0] );
}
iin = lgth1; jin = lgth2;
*impwmpt = 0.0;
- for( k=0; k<=lgth1+lgth2; k++ )
+ limk = lgth1+lgth2+1;
+ for( k=0; k<limk; k++ )
{
- if( ijp[iin][jin] < 0 )
+
+ if( ijp[iin][jin] >= warpbase )
+ {
+ ifi = warpis[ijp[iin][jin]-warpbase];
+ jfi = warpjs[ijp[iin][jin]-warpbase];
+ }
+ else if( ijp[iin][jin] < 0 )
{
ifi = iin-1; jfi = jin+ijp[iin][jin];
}
{
ifi = iin-1; jfi = jin-1;
}
- l = iin - ifi;
- while( --l )
+ if( ifi == -warpbase && jfi == -warpbase )
{
- for( i=0; i<icyc; i++ )
- *--mseq1[i] = seq1[i][ifi+l];
- for( j=0; j<jcyc; j++ )
- *--mseq2[j] = *gap;
- k++;
+ l = iin;
+ while( --l >= 0 )
+ {
+ for( i=0; i<icyc; i++ )
+ *--mseq1[i] = seq1[i][l];
+ for( j=0; j<jcyc; j++ )
+ *--mseq2[j] = *gap;
+ k++;
+ }
+ l= jin;
+ while( --l >= 0 )
+ {
+ for( i=0; i<icyc; i++ )
+ *--mseq1[i] = *gap;
+ for( j=0; j<jcyc; j++ )
+ *--mseq2[j] = seq2[j][l];
+ k++;
+ }
+ break;
}
- l= jin - jfi;
- while( --l )
+ else
{
- for( i=0; i<icyc; i++ )
- *--mseq1[i] = *gap;
- for( j=0; j<jcyc; j++ )
- *--mseq2[j] = seq2[j][jfi+l];
- k++;
+ l = iin - ifi;
+ while( --l )
+ {
+ for( i=0; i<icyc; i++ )
+ *--mseq1[i] = seq1[i][ifi+l];
+ for( j=0; j<jcyc; j++ )
+ *--mseq2[j] = *gap;
+ k++;
+ }
+ l= jin - jfi;
+ while( --l )
+ {
+ for( i=0; i<icyc; i++ )
+ *--mseq1[i] = *gap;
+ for( j=0; j<jcyc; j++ )
+ *--mseq2[j] = seq2[j][jfi+l];
+ k++;
+ }
}
if( iin != lgth1 && jin != lgth2 ) // ??
{
- *impwmpt += part_imp_match_out_sc( gapmap1[iin]+start1, gapmap2[jin]+start2 );
+ *impwmpt += (double)part_imp_match_out_sc( gapmap1[iin]+start1, gapmap2[jin]+start2 );
// fprintf( stderr, "impwm = %f (iin=%d, jin=%d) seq1=%c, seq2=%c\n", *impwmpt, iin, jin, seq1[0][iin], seq2[0][jin] );
}
if( iin <= 0 || jin <= 0 ) break;
- for( i=0; i<icyc; i++ )
+ for( i=0; i<icyc; i++ )
*--mseq1[i] = seq1[i][ifi];
- for( j=0; j<jcyc; j++ )
+ for( j=0; j<jcyc; j++ )
*--mseq2[j] = seq2[j][jfi];
k++;
iin = ifi; jin = jfi;
}
}
-static float Atracking( float *lasthorizontalw, float *lastverticalw,
+
+static double Atracking( double *lasthorizontalw, double *lastverticalw,
char **seq1, char **seq2,
char **mseq1, char **mseq2,
- float **cpmx1, float **cpmx2,
- int **ijp, int icyc, int jcyc )
+ int **ijp, int icyc, int jcyc,
+ int *warpis, int *warpjs, int warpbase )
{
- int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, lastk;
+ int i, j, l, iin, jin, ifi, jfi, lgth1, lgth2, k, lastk, limk;
// char gap[] = "-";
char *gap;
gap = newgapstr;
- float wm = 0.0;
+ double wm = 0.0;
lgth1 = strlen( seq1[0] );
lgth2 = strlen( seq2[0] );
}
iin = lgth1; jin = lgth2;
lastk = lgth1+lgth2;
- for( k=0; k<=lastk; k++ )
+ limk = lgth1+lgth2+1;
+ for( k=0; k<limk; k++ )
{
- if( ijp[iin][jin] < 0 )
+ if( ijp[iin][jin] >= warpbase )
+ {
+ ifi = warpis[ijp[iin][jin]-warpbase];
+ jfi = warpjs[ijp[iin][jin]-warpbase];
+ }
+ else if( ijp[iin][jin] < 0 )
{
ifi = iin-1; jfi = jin+ijp[iin][jin];
}
{
ifi = iin-1; jfi = jin-1;
}
- l = iin - ifi;
- while( --l )
+ if( ifi == -warpbase && jfi == -warpbase )
{
- for( i=0; i<icyc; i++ )
- *--mseq1[i] = seq1[i][ifi+l];
- for( j=0; j<jcyc; j++ )
- *--mseq2[j] = *gap;
- k++;
+ l = iin;
+ while( --l >= 0 )
+ {
+ for( i=0; i<icyc; i++ )
+ *--mseq1[i] = seq1[i][l];
+ for( j=0; j<jcyc; j++ )
+ *--mseq2[j] = *gap;
+ k++;
+ }
+ l= jin;
+ while( --l >= 0 )
+ {
+ for( i=0; i<icyc; i++ )
+ *--mseq1[i] = *gap;
+ for( j=0; j<jcyc; j++ )
+ *--mseq2[j] = seq2[j][l];
+ k++;
+ }
+ break;
}
- l= jin - jfi;
- while( --l )
+ else
{
- for( i=0; i<icyc; i++ )
- *--mseq1[i] = *gap;
- for( j=0; j<jcyc; j++ )
- *--mseq2[j] = seq2[j][jfi+l];
- k++;
+ l = iin - ifi;
+ while( --l )
+ {
+ for( i=0; i<icyc; i++ )
+ *--mseq1[i] = seq1[i][ifi+l];
+ for( j=0; j<jcyc; j++ )
+ *--mseq2[j] = *gap;
+ k++;
+ }
+ l= jin - jfi;
+ while( --l )
+ {
+ for( i=0; i<icyc; i++ )
+ *--mseq1[i] = *gap;
+ for( j=0; j<jcyc; j++ )
+ *--mseq2[j] = seq2[j][jfi+l];
+ k++;
+ }
}
+
if( iin <= 0 || jin <= 0 ) break;
for( i=0; i<icyc; i++ )
*--mseq1[i] = seq1[i][ifi];
return( 0.0 );
}
-float partA__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, float *impmatch, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres )
+double partA__align( char **seq1, char **seq2, double *eff1, double *eff2, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
// int k;
int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
int lgth1, lgth2;
int resultlen;
- float wm = 0.0; /* int ?????? */
- float g;
- float *currentw, *previousw;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
#if 1
- float *wtmp;
+ double *wtmp;
int *ijppt;
- float *mjpt, *prept, *curpt;
+ double *mjpt, *prept, *curpt;
int *mpjpt;
#endif
- static TLS float mi, *m;
+ static TLS double mi, *m;
static TLS int **ijp;
static TLS int mpi, *mp;
- static TLS float *w1, *w2;
- static TLS float *match;
- static TLS float *initverticalw; /* kufuu sureba iranai */
- static TLS float *lastverticalw; /* kufuu sureba iranai */
+ static TLS double *w1, *w2;
+ static TLS double *match;
+ static TLS double *initverticalw; /* kufuu sureba iranai */
+ static TLS double *lastverticalw; /* kufuu sureba iranai */
static TLS char **mseq1;
static TLS char **mseq2;
static TLS char **mseq;
- static TLS float *ogcp1;
- static TLS float *ogcp2;
- static TLS float *fgcp1;
- static TLS float *fgcp2;
- static TLS float **cpmx1;
- static TLS float **cpmx2;
+ static TLS double *ogcp1;
+ static TLS double *ogcp2;
+ static TLS double *fgcp1;
+ static TLS double *fgcp2;
+ static TLS double **cpmx1;
+ static TLS double **cpmx2;
+ static TLS double *gapfreq1;
+ static TLS double *gapfreq2;
static TLS int **intwork;
- static TLS float **floatwork;
+ static TLS double **doublework;
static TLS int orlgth1 = 0, orlgth2 = 0;
- float fpenalty = (float)penalty;
+ double fpenalty = (double)penalty;
+ double fpenalty_shift = (double)penalty_shift;
#if USE_PENALTY_EX
- float fpenalty_ex = (float)penalty_ex;
+ double fpenalty_ex = (double)penalty_ex;
#endif
- float *fgcp2pt;
- float *ogcp2pt;
- float fgcp1va;
- float ogcp1va;
+ double *fgcp2pt;
+ double *ogcp2pt;
+ double fgcp1va;
+ double ogcp1va;
+ double *gf2pt;
+ double *gf2ptpre;
+ double gf1va;
+ double gf1vapre;
+ double headgapfreq1;
+ double headgapfreq2;
+
+ int *warpis = NULL;
+ int *warpjs = NULL;
+ int *warpi = NULL;
+ int *warpj = NULL;
+ int *prevwarpi = NULL;
+ int *prevwarpj = NULL;
+ double *wmrecords = NULL;
+ double *prevwmrecords = NULL;
+ int warpn = 0;
+ int warpbase;
+ double curm = 0.0;
+ double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt;
+ int *warpipt, *warpjpt;
if( seq1 == NULL )
orlgth1 = 0;
orlgth2 = 0;
- part_imp_match_init_strict( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0 );
+ part_imp_match_init_strict( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL );
free( mseq1 );
free( mseq2 );
FreeFloatMtx( cpmx1 );
FreeFloatMtx( cpmx2 );
- FreeFloatMtx( floatwork );
+ FreeFloatVec( gapfreq1 );
+ FreeFloatVec( gapfreq2 );
+
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
}
}
return( 0.0 );
}
+// fprintf( stderr, "IN partA__align\n" );
+
lgth1 = strlen( seq1[0] );
lgth2 = strlen( seq2[0] );
#if 1
- if( lgth1 == 0 ) fprintf( stderr, "WARNING: lgth1=0 in partA__align\n" );
- if( lgth2 == 0 ) fprintf( stderr, "WARNING: lgth2=0 in partA__align\n" );
+// if( lgth1 == 0 ) fprintf( stderr, "WARNING: lgth1=0 in partA__align\n" );
+// if( lgth2 == 0 ) fprintf( stderr, "WARNING: lgth2=0 in partA__align\n" );
if( lgth1 == 0 && lgth2 == 0 )
return( 0.0 );
{
j = lgth2;
seq1[i][j] = 0;
- while( j ) seq1[i][--j] = '-';
+ while( j ) seq1[i][--j] = *newgapstr;
// fprintf( stderr, "seq1[i] = %s\n", seq1[i] );
}
return( 0.0 );
{
j = lgth1;
seq2[i][j] = 0;
- while( j ) seq2[i][--j] = '-';
+ while( j ) seq2[i][--j] = *newgapstr;
// fprintf( stderr, "seq2[i] = %s\n", seq2[i] );
}
return( 0.0 );
}
#endif
+ warpbase = lgth1 + lgth2;
+ warpis = NULL;
+ warpjs = NULL;
+ warpn = 0;
+
+
+
+
+ if( trywarp )
+ {
+ if( outgap == 0 )
+ {
+ fprintf( stderr, "At present, outgap must be 1.\n" );
+ exit( 1 );
+ }
+ wmrecords = AllocateFloatVec( lgth2+1 );
+ warpi = AllocateIntVec( lgth2+1 );
+ warpj = AllocateIntVec( lgth2+1 );
+ prevwmrecords = AllocateFloatVec( lgth2+1 );
+ prevwarpi = AllocateIntVec( lgth2+1 );
+ prevwarpj = AllocateIntVec( lgth2+1 );
+ for( i=0; i<lgth2+1; i++ ) wmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwarpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) prevwarpj[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpj[i] = -warpbase;
+ }
+
#if 0
fprintf( stderr, "eff in SA+++align\n" );
for( i=0; i<icyc; i++ ) fprintf( stderr, "eff1[%d] = %f\n", i, eff1[i] );
FreeFloatMtx( cpmx1 );
FreeFloatMtx( cpmx2 );
- FreeFloatMtx( floatwork );
+ FreeFloatVec( gapfreq1 );
+ FreeFloatVec( gapfreq2 );
+
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
}
fgcp1 = AllocateFloatVec( ll1+2 );
fgcp2 = AllocateFloatVec( ll2+2 );
- cpmx1 = AllocateFloatMtx( 26, ll1+2 );
- cpmx2 = AllocateFloatMtx( 26, ll2+2 );
+ cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 );
+ cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 );
+
+ gapfreq1 = AllocateFloatVec( ll1+2 );
+ gapfreq2 = AllocateFloatVec( ll2+2 );
#if FASTMATCHCALC
- floatwork = AllocateFloatMtx( MAX( ll1, ll2 )+2, 26 );
- intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, 26 );
+ doublework = AllocateFloatMtx( MAX( ll1, ll2 )+2, nalphabets );
+ intwork = AllocateIntMtx( MAX( ll1, ll2 )+2, nalphabets );
#else
- floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 );
- intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 );
+ doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
+ intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
#endif
#if DEBUG
new_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2, sgap2 );
new_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1, egap1 );
new_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2, egap2 );
+ outgapcount( &headgapfreq1, icyc, sgap1, eff1 );
+ outgapcount( &headgapfreq2, jcyc, sgap2, eff2 );
+ outgapcount( gapfreq1+lgth1, icyc, egap1, eff1 );
+ outgapcount( gapfreq2+lgth2, jcyc, egap2, eff2 );
}
else
{
st_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2 );
st_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1 );
st_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2 );
+ headgapfreq1 = 0.0;
+ headgapfreq2 = 0.0;
+ gapfreq1[lgth1] = 0.0;
+ gapfreq2[lgth2] = 0.0;
+ }
+
+ if( legacygapcost == 0 )
+ {
+ gapcountf( gapfreq1, seq1, icyc, eff1, lgth1 );
+ gapcountf( gapfreq2, seq2, jcyc, eff2, lgth2 );
+ for( i=0; i<lgth1+1; i++ ) gapfreq1[i] = 1.0 - gapfreq1[i];
+ for( i=0; i<lgth2+1; i++ ) gapfreq2[i] = 1.0 - gapfreq2[i];
+ headgapfreq1 = 1.0 - headgapfreq1;
+ headgapfreq2 = 1.0 - headgapfreq2;
+ }
+ else
+ {
+ for( i=0; i<lgth1+1; i++ ) gapfreq1[i] = 1.0;
+ for( i=0; i<lgth2+1; i++ ) gapfreq2[i] = 1.0;
+ headgapfreq1 = 1.0;
+ headgapfreq2 = 1.0;
}
for( i=0; i<lgth1; i++ )
{
- ogcp1[i] = 0.5 * ( 1.0 - ogcp1[i] ) * fpenalty;
- fgcp1[i] = 0.5 * ( 1.0 - fgcp1[i] ) * fpenalty;
+ ogcp1[i] = 0.5 * ( 1.0 - ogcp1[i] ) * fpenalty * ( gapfreq1[i] );
+ fgcp1[i] = 0.5 * ( 1.0 - fgcp1[i] ) * fpenalty * ( gapfreq1[i] );
}
for( i=0; i<lgth2; i++ )
{
- ogcp2[i] = 0.5 * ( 1.0 - ogcp2[i] ) * fpenalty;
- fgcp2[i] = 0.5 * ( 1.0 - fgcp2[i] ) * fpenalty;
+ ogcp2[i] = 0.5 * ( 1.0 - ogcp2[i] ) * fpenalty * ( gapfreq2[i] );
+ fgcp2[i] = 0.5 * ( 1.0 - fgcp2[i] ) * fpenalty * ( gapfreq2[i] );
}
#if 0
for( i=0; i<lgth1; i++ )
previousw = w2;
- match_calc( initverticalw, cpmx2, cpmx1, 0, lgth1, floatwork, intwork, 1 );
+ match_calc( initverticalw, cpmx2, cpmx1, 0, lgth1, doublework, intwork, 1 );
if( localhom )
part_imp_match_out_vead_tate_gapmap( initverticalw, gapmap2[0]+start2, lgth1, start1, gapmap1 );
- match_calc( currentw, cpmx1, cpmx2, 0, lgth2, floatwork, intwork, 1 );
+ match_calc( currentw, cpmx1, cpmx2, 0, lgth2, doublework, intwork, 1 );
if( localhom )
part_imp_match_out_vead_gapmap( currentw, gapmap1[0]+start1, lgth2, start2, gapmap2 );
#if 0 // -> tbfast.c
{
for( i=1; i<lgth1+1; i++ )
{
- initverticalw[i] += ( ogcp1[0] + fgcp1[i-1] ) ;
+// initverticalw[i] += ( ogcp1[0] + fgcp1[i-1] ) ;
+ initverticalw[i] += ( ogcp1[0] * headgapfreq2 + fgcp1[i-1] * gapfreq2[0] ) ;
}
for( j=1; j<lgth2+1; j++ )
{
- currentw[j] += ( ogcp2[0] + fgcp2[j-1] ) ;
+// currentw[j] += ( ogcp2[0] + fgcp2[j-1] ) ;
+ currentw[j] += ( ogcp2[0] * headgapfreq1 + fgcp2[j-1] * gapfreq1[0] ) ;
}
}
#if OUTGAP0TRY
for( j=1; j<lgth2+1; ++j )
{
- m[j] = currentw[j-1] + ogcp1[1]; mp[j] = 0;
+// m[j] = currentw[j-1] + ogcp1[1]; mp[j] = 0;
+ m[j] = currentw[j-1] + ogcp1[1] * gapfreq2[j-1]; mp[j] = 0;;
}
lastverticalw[0] = currentw[lgth2-1];
previousw[0] = initverticalw[i-1];
- match_calc( currentw, cpmx1, cpmx2, i, lgth2, floatwork, intwork, 0 );
-#if XXXXXXX
+ match_calc( currentw, cpmx1, cpmx2, i, lgth2, doublework, intwork, 0 );
+#if 0
fprintf( stderr, "\n" );
fprintf( stderr, "i=%d\n", i );
-fprintf( stderr, "currentw = \n" );
+fprintf( stderr, "currentw before imp = \n" );
for( j=0; j<lgth2; j++ )
{
fprintf( stderr, "%5.2f ", currentw[j] );
// imp_match_out_vead( currentw, i, lgth2 );
part_imp_match_out_vead_gapmap( currentw, gapmap1[i]+start1, lgth2, start2, gapmap2 );
}
-#if XXXXXXX
-fprintf( stderr, "\n" );
+#if 0
+fprintf( stderr, "specificity = 0\n" );
fprintf( stderr, "i=%d\n", i );
fprintf( stderr, "currentw = \n" );
for( j=0; j<lgth2; j++ )
currentw[0] = initverticalw[i];
- mi = previousw[0] + ogcp2[1]; mpi = 0;
+// mi = previousw[0] + ogcp2[1]; mpi = 0;
+ mi = previousw[0] + ogcp2[1] * gapfreq1[i-1]; mpi=0;
ijppt = ijp[i] + 1;
mjpt = m + 1;
ogcp2pt = ogcp2+1;
fgcp1va = fgcp1[i-1];
ogcp1va = ogcp1[i];
+ gf1va = gapfreq1[i];
+ gf1vapre = gapfreq1[i-1];
+ gf2pt = gapfreq2+1;
+ gf2ptpre = gapfreq2;
+
+ if( trywarp )
+ {
+ prevwmrecordspt = prevwmrecords;
+ wmrecordspt = wmrecords+1;
+ wmrecords1pt = wmrecords;
+ warpipt = warpi + 1;
+ warpjpt = warpj + 1;
+ }
+
for( j=1; j<lastj; j++ )
{
#ifdef xxxenablemultithread
#if 0
fprintf( stderr, "%5.0f->", wm );
#endif
- g = mi + *fgcp2pt;
-#if 0
- fprintf( stderr, "%5.0f?", g );
-#endif
- if( g > wm )
+// g = mi + *fgcp2pt * gapfreq1[i];
+ if( (g = mi + *fgcp2pt * gf1va) > wm )
{
wm = g;
*ijppt = -( j - mpi );
}
- g = *prept + *ogcp2pt;
- if( g >= mi )
+// g = *prept + *ogcp2pt * gapfreq1[i-1];
+ if( (g = *prept + *ogcp2pt * gf1vapre) >= mi )
{
mi = g;
mpi = j-1;
mi += fpenalty_ex;
#endif
- g = *mjpt + fgcp1va;
-#if 0
- fprintf( stderr, "%5.0f?", g );
-#endif
- if( g > wm )
+// g = *mjpt + fgcp1va * gapfreq2[j];
+ if( (g = *mjpt + fgcp1va * *gf2pt) > wm )
{
wm = g;
*ijppt = +( i - *mpjpt );
}
- g = *prept + ogcp1va;
- if( g >= *mjpt )
+// g = *prept + ogcp1va * gapfreq2[j-1];
+ if( (g = *prept + ogcp1va * *gf2ptpre) >= *mjpt )
{
*mjpt = g;
*mpjpt = i-1;
#if USE_PENALTY_EX
m[j] += fpenalty_ex;
#endif
+ if( trywarp )
+ {
+#if USE_PENALTY_EX
+ if( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai
+#else
+ if( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai
+#endif
+ {
+// fprintf( stderr, "WARP in partA__align\n" );
+ if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] )
+ {
+ *ijppt = warpbase + warpn - 1;
+ }
+ else
+ {
+ *ijppt = warpbase + warpn;
+ warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) );
+ warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) );
+ warpis[warpn] = prevwarpi[j-1];
+ warpjs[warpn] = prevwarpj[j-1];
+ warpn++;
+ }
+ wm = g;
+ }
+ curm = *curpt + wm;
+ if( *wmrecords1pt > *wmrecordspt )
+ {
+ *wmrecordspt = *wmrecords1pt;
+ *warpipt = *(warpipt-1);
+ *warpjpt = *(warpjpt-1);
+ }
+ if( curm > *wmrecordspt )
+ {
+ *wmrecordspt = curm;
+ *warpipt = i;
+ *warpjpt = j;
+ }
+ wmrecordspt++;
+ wmrecords1pt++;
+ warpipt++;
+ warpjpt++;
+ }
#if 0
fprintf( stderr, "%5.0f ", wm );
curpt++;
fgcp2pt++;
ogcp2pt++;
+ gf2ptpre++;
+ gf2pt++;
+
}
lastverticalw[i] = currentw[lgth2-1];
+ if( trywarp )
+ {
+ fltncpy( prevwmrecords, wmrecords, lastj );
+ intncpy( prevwarpi, warpi, lastj );
+ intncpy( prevwarpj, warpj, lastj );
+ }
+ }
+ if( trywarp )
+ {
+// fprintf( stderr, "wm = %f\n", wm );
+// fprintf( stderr, "warpn = %d\n", warpn );
+ free( wmrecords );
+ free( prevwmrecords );
+ free( warpi );
+ free( warpj );
+ free( prevwarpi );
+ free( prevwarpj );
}
#if OUTGAP0TRY
*/
if( localhom )
{
- Atracking_localhom( impmatch, currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp, icyc, jcyc, start1, end1, start2, end2, gapmap1, gapmap2 );
+ Atracking_localhom( impmatch, currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, start1, end1, start2, end2, gapmap1, gapmap2, warpis, warpjs, warpbase );
}
else
- Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijp, icyc, jcyc );
+ Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, warpis, warpjs, warpbase );
+
+ if( warpis ) free( warpis );
+ if( warpjs ) free( warpjs );
// fprintf( stderr, "### impmatch = %f\n", *impmatch );
return( wm );
}
+double partA__align_variousdist( int **which, double ***matrices, double **n_dynamicmtx, char **seq1, char **seq2, double *eff1, double *eff2, double **eff1s, double **eff2s, int icyc, int jcyc, int alloclen, LocalHom ***localhom, double *impmatch, int start1, int end1, int start2, int end2, int *gapmap1, int *gapmap2, char *sgap1, char *sgap2, char *egap1, char *egap2, int *chudanpt, int chudanref, int *chudanres )
+/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
+{
+// int k;
+ register int i, j, c;
+ int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
+ int lgth1, lgth2;
+ int resultlen;
+ double wm = 0.0; /* int ?????? */
+ double g;
+ double *currentw, *previousw;
+#if 1
+ double *wtmp;
+ int *ijppt;
+ double *mjpt, *prept, *curpt;
+ int *mpjpt;
+#endif
+ static TLS double mi, *m;
+ static TLS int **ijp;
+ static TLS int mpi, *mp;
+ static TLS double *w1, *w2;
+ static TLS double *match;
+ static TLS double *initverticalw; /* kufuu sureba iranai */
+ static TLS double *lastverticalw; /* kufuu sureba iranai */
+ static TLS char **mseq1;
+ static TLS char **mseq2;
+ static TLS char **mseq;
+ static TLS double *ogcp1;
+ static TLS double *ogcp2;
+ static TLS double *fgcp1;
+ static TLS double *fgcp2;
+ static TLS double ***cpmx1s;
+ static TLS double ***cpmx2s;
+ static TLS double *gapfreq1;
+ static TLS double *gapfreq2;
+ static TLS int ***intwork;
+ static TLS double ***doublework;
+ static TLS int orlgth1 = 0, orlgth2 = 0;
+ double fpenalty = (double)penalty;
+ double fpenalty_shift = (double)penalty_shift;
+#if USE_PENALTY_EX
+ double fpenalty_ex = (double)penalty_ex;
+#endif
+ double *fgcp2pt;
+ double *ogcp2pt;
+ double fgcp1va;
+ double ogcp1va;
+ double *gf2pt;
+ double *gf2ptpre;
+ double gf1va;
+ double gf1vapre;
+ double headgapfreq1;
+ double headgapfreq2;
+
+ int *warpis = NULL;
+ int *warpjs = NULL;
+ int *warpi = NULL;
+ int *warpj = NULL;
+ int *prevwarpi = NULL;
+ int *prevwarpj = NULL;
+ double *wmrecords = NULL;
+ double *prevwmrecords = NULL;
+ int warpn = 0;
+ int warpbase;
+ double curm = 0.0;
+ double *wmrecordspt, *wmrecords1pt, *prevwmrecordspt;
+ int *warpipt, *warpjpt;
+ int *nmask, **masklist1, **masklist2;
+
+
+ if( seq1 == NULL )
+ {
+ if( orlgth1 )
+ {
+// fprintf( stderr, "## Freeing local arrays in A__align\n" );
+ orlgth1 = 0;
+ orlgth2 = 0;
+
+ part_imp_match_init_strict( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL );
+
+ free( mseq1 );
+ free( mseq2 );
+ FreeFloatVec( w1 );
+ FreeFloatVec( w2 );
+ FreeFloatVec( match );
+ FreeFloatVec( initverticalw );
+ FreeFloatVec( lastverticalw );
+
+ FreeFloatVec( m );
+ FreeIntVec( mp );
+
+ FreeCharMtx( mseq );
+
+ FreeFloatVec( ogcp1 );
+ FreeFloatVec( ogcp2 );
+ FreeFloatVec( fgcp1 );
+ FreeFloatVec( fgcp2 );
+
+
+ FreeFloatCub( cpmx1s );
+ FreeFloatCub( cpmx2s );
+
+ FreeFloatVec( gapfreq1 );
+ FreeFloatVec( gapfreq2 );
+
+ FreeFloatCub( doublework );
+ FreeIntCub( intwork );
+
+ }
+ else
+ {
+// fprintf( stderr, "## Not allocated\n" );
+ }
+ return( 0.0 );
+ }
+
+ masklist1 = AllocateIntMtx( maxdistclass, 0 );
+ masklist2 = AllocateIntMtx( maxdistclass, 0 );
+ nmask = calloc( maxdistclass, sizeof( int ) );
+
+ for( c=0; c<maxdistclass; c++ )
+ {
+ for( i=0; i<icyc; i++ ) for( j=0; j<jcyc; j++ )
+ {
+ if( eff1s[c][i] * eff2s[c][j] != 0.0 )
+ {
+
+ if( c != which[i][j] )
+ {
+ masklist1[c] = realloc( masklist1[c], sizeof( int ) * (nmask[c]+1) );
+ masklist2[c] = realloc( masklist2[c], sizeof( int ) * (nmask[c]+1) );
+
+ masklist1[c][nmask[c]] = i;
+ masklist2[c][nmask[c]] = j;
+ nmask[c]++;
+ }
+ }
+ }
+ }
+ for( c=0; c<maxdistclass; c++ ) if( nmask[c] ) break;
+ if( c<maxdistclass ) reporterr( "Found a complex grouping. This step may be a bit slow.\n" );
+
+ lgth1 = strlen( seq1[0] );
+ lgth2 = strlen( seq2[0] );
+#if 1
+// if( lgth1 == 0 ) fprintf( stderr, "WARNING: lgth1=0 in partA__align\n" );
+// if( lgth2 == 0 ) fprintf( stderr, "WARNING: lgth2=0 in partA__align\n" );
+
+ if( lgth1 == 0 && lgth2 == 0 )
+ return( 0.0 );
+
+ if( lgth1 == 0 )
+ {
+ for( i=0; i<icyc; i++ )
+ {
+ j = lgth2;
+ seq1[i][j] = 0;
+ while( j ) seq1[i][--j] = *newgapstr;
+// fprintf( stderr, "seq1[i] = %s\n", seq1[i] );
+ }
+ return( 0.0 );
+ }
+
+ if( lgth2 == 0 )
+ {
+ for( i=0; i<jcyc; i++ )
+ {
+ j = lgth1;
+ seq2[i][j] = 0;
+ while( j ) seq2[i][--j] = *newgapstr;
+// fprintf( stderr, "seq2[i] = %s\n", seq2[i] );
+ }
+ return( 0.0 );
+ }
+#endif
+
+ warpbase = lgth1 + lgth2;
+ warpis = NULL;
+ warpjs = NULL;
+ warpn = 0;
+
+
+ if( trywarp )
+ {
+// fprintf( stderr, "IN partA__align_variousdist\n" );
+ if( outgap == 0 )
+ {
+ fprintf( stderr, "At present, outgap must be 1 to allow shift.\n" );
+ exit( 1 );
+ }
+ wmrecords = AllocateFloatVec( lgth2+1 );
+ warpi = AllocateIntVec( lgth2+1 );
+ warpj = AllocateIntVec( lgth2+1 );
+ prevwmrecords = AllocateFloatVec( lgth2+1 );
+ prevwarpi = AllocateIntVec( lgth2+1 );
+ prevwarpj = AllocateIntVec( lgth2+1 );
+ for( i=0; i<lgth2+1; i++ ) wmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwmrecords[i] = 0.0;
+ for( i=0; i<lgth2+1; i++ ) prevwarpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) prevwarpj[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpi[i] = -warpbase;
+ for( i=0; i<lgth2+1; i++ ) warpj[i] = -warpbase;
+ }
+
+#if 0
+ fprintf( stderr, "eff in SA+++align\n" );
+ for( i=0; i<icyc; i++ ) fprintf( stderr, "eff1[%d] = %f\n", i, eff1[i] );
+#endif
+ if( orlgth1 == 0 )
+ {
+ mseq1 = AllocateCharMtx( njob, 0 );
+ mseq2 = AllocateCharMtx( njob, 0 );
+ }
+
+
+
+
+ if( lgth1 > orlgth1 || lgth2 > orlgth2 )
+ {
+ int ll1, ll2;
+
+ if( orlgth1 > 0 && orlgth2 > 0 )
+ {
+ FreeFloatVec( w1 );
+ FreeFloatVec( w2 );
+ FreeFloatVec( match );
+ FreeFloatVec( initverticalw );
+ FreeFloatVec( lastverticalw );
+
+ FreeFloatVec( m );
+ FreeIntVec( mp );
+
+ FreeCharMtx( mseq );
+
+ FreeFloatVec( ogcp1 );
+ FreeFloatVec( ogcp2 );
+ FreeFloatVec( fgcp1 );
+ FreeFloatVec( fgcp2 );
+
+
+ FreeFloatCub( cpmx1s );
+ FreeFloatCub( cpmx2s );
+
+ FreeFloatVec( gapfreq1 );
+ FreeFloatVec( gapfreq2 );
+
+ FreeFloatCub( doublework );
+ FreeIntCub( intwork );
+ }
+
+ ll1 = MAX( (int)(1.3*lgth1), orlgth1 ) + 100;
+ ll2 = MAX( (int)(1.3*lgth2), orlgth2 ) + 100;
+
+#if DEBUG
+ fprintf( stderr, "\ntrying to allocate (%d+%d)xn matrices ... ", ll1, ll2 );
+#endif
+
+ w1 = AllocateFloatVec( ll2+2 );
+ w2 = AllocateFloatVec( ll2+2 );
+ match = AllocateFloatVec( ll2+2 );
+
+ initverticalw = AllocateFloatVec( ll1+2 );
+ lastverticalw = AllocateFloatVec( ll1+2 );
+
+ m = AllocateFloatVec( ll2+2 );
+ mp = AllocateIntVec( ll2+2 );
+
+ mseq = AllocateCharMtx( njob, ll1+ll2 );
+
+ ogcp1 = AllocateFloatVec( ll1+2 );
+ ogcp2 = AllocateFloatVec( ll2+2 );
+ fgcp1 = AllocateFloatVec( ll1+2 );
+ fgcp2 = AllocateFloatVec( ll2+2 );
+
+ cpmx1s = AllocateFloatCub( maxdistclass, nalphabets, ll1+2 );
+ cpmx2s = AllocateFloatCub( maxdistclass, nalphabets, ll2+2 );
+
+ gapfreq1 = AllocateFloatVec( ll1+2 );
+ gapfreq2 = AllocateFloatVec( ll2+2 );
+
+ doublework = AllocateFloatCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets );
+ intwork = AllocateIntCub( maxdistclass, MAX( ll1, ll2 )+2, nalphabets );
+
+#if DEBUG
+ fprintf( stderr, "succeeded\n" );
+#endif
+
+ orlgth1 = ll1 - 100;
+ orlgth2 = ll2 - 100;
+ }
+
+
+ for( i=0; i<icyc; i++ ) mseq1[i] = mseq[i];
+ for( j=0; j<jcyc; j++ ) mseq2[j] = mseq[icyc+j];
+
+
+ if( orlgth1 > commonAlloc1 || orlgth2 > commonAlloc2 )
+ {
+ int ll1, ll2;
+
+ if( commonAlloc1 && commonAlloc2 )
+ {
+ FreeIntMtx( commonIP );
+ }
+
+ ll1 = MAX( orlgth1, commonAlloc1 );
+ ll2 = MAX( orlgth2, commonAlloc2 );
+
+#if DEBUG
+ fprintf( stderr, "\n\ntrying to allocate %dx%d matrices ... ", ll1+1, ll2+1 );
+#endif
+
+ commonIP = AllocateIntMtx( ll1+10, ll2+10 );
+
+#if DEBUG
+ fprintf( stderr, "succeeded\n\n" );
+#endif
+
+ commonAlloc1 = ll1;
+ commonAlloc2 = ll2;
+ }
+ ijp = commonIP;
+
+// cpmx_calc_new( seq1, cpmx1, eff1, lgth1, icyc );
+// cpmx_calc_new( seq2, cpmx2, eff2, lgth2, jcyc );
+ for( c=0; c<maxdistclass; c++ )
+ {
+ cpmx_calc_new( seq1, cpmx1s[c], eff1s[c], lgth1, icyc );
+ cpmx_calc_new( seq2, cpmx2s[c], eff2s[c], lgth2, jcyc );
+ }
+
+ if( sgap1 )
+ {
+ new_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1, sgap1 );
+ new_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2, sgap2 );
+ new_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1, egap1 );
+ new_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2, egap2 );
+ outgapcount( &headgapfreq1, icyc, sgap1, eff1 );
+ outgapcount( &headgapfreq2, jcyc, sgap2, eff2 );
+ outgapcount( gapfreq1+lgth1, icyc, egap1, eff1 );
+ outgapcount( gapfreq2+lgth2, jcyc, egap2, eff2 );
+ }
+ else
+ {
+ st_OpeningGapCount( ogcp1, icyc, seq1, eff1, lgth1 );
+ st_OpeningGapCount( ogcp2, jcyc, seq2, eff2, lgth2 );
+ st_FinalGapCount( fgcp1, icyc, seq1, eff1, lgth1 );
+ st_FinalGapCount( fgcp2, jcyc, seq2, eff2, lgth2 );
+ headgapfreq1 = 0.0;
+ headgapfreq2 = 0.0;
+ gapfreq1[lgth1] = 0.0;
+ gapfreq2[lgth2] = 0.0;
+ }
+
+ if( legacygapcost == 0 )
+ {
+ gapcountf( gapfreq1, seq1, icyc, eff1, lgth1 );
+ gapcountf( gapfreq2, seq2, jcyc, eff2, lgth2 );
+ for( i=0; i<lgth1+1; i++ ) gapfreq1[i] = 1.0 - gapfreq1[i];
+ for( i=0; i<lgth2+1; i++ ) gapfreq2[i] = 1.0 - gapfreq2[i];
+ headgapfreq1 = 1.0 - headgapfreq1;
+ headgapfreq2 = 1.0 - headgapfreq2;
+ }
+ else
+ {
+ for( i=0; i<lgth1+1; i++ ) gapfreq1[i] = 1.0;
+ for( i=0; i<lgth2+1; i++ ) gapfreq2[i] = 1.0;
+ headgapfreq1 = 1.0;
+ headgapfreq2 = 1.0;
+ }
+
+ for( i=0; i<lgth1; i++ )
+ {
+ ogcp1[i] = 0.5 * ( 1.0 - ogcp1[i] ) * fpenalty * ( gapfreq1[i] );
+ fgcp1[i] = 0.5 * ( 1.0 - fgcp1[i] ) * fpenalty * ( gapfreq1[i] );
+ }
+ for( i=0; i<lgth2; i++ )
+ {
+ ogcp2[i] = 0.5 * ( 1.0 - ogcp2[i] ) * fpenalty * ( gapfreq2[i] );
+ fgcp2[i] = 0.5 * ( 1.0 - fgcp2[i] ) * fpenalty * ( gapfreq2[i] );
+ }
+#if 0
+ for( i=0; i<lgth1; i++ )
+ fprintf( stderr, "ogcp1[%d]=%f\n", i, ogcp1[i] );
+#endif
+
+ currentw = w1;
+ previousw = w2;
+
+
+// match_calc( initverticalw, cpmx2, cpmx1, 0, lgth1, doublework, intwork, 1 );
+ fillzero( initverticalw, lgth1 );
+ for( c=0; c<maxdistclass; c++ )
+ {
+ match_calc_add( matrices[c], initverticalw, cpmx2s[c], cpmx1s[c], 0, lgth1, doublework[c], intwork[c], 1 );
+ if( nmask[c] ) match_calc_del( which, matrices, initverticalw, jcyc, seq2, eff2, icyc, seq1, eff1, 0, lgth1, c, nmask[c], masklist2[c], masklist1[c] );
+ }
+
+ if( localhom )
+ part_imp_match_out_vead_tate_gapmap( initverticalw, gapmap2[0]+start2, lgth1, start1, gapmap1 );
+
+
+// match_calc( currentw, cpmx1, cpmx2, 0, lgth2, doublework, intwork, 1 );
+ fillzero( currentw, lgth2 );
+ for( c=0; c<maxdistclass; c++ )
+ {
+ match_calc_add( matrices[c], currentw, cpmx1s[c], cpmx2s[c], 0, lgth2, doublework[c], intwork[c], 1 );
+ if( nmask[c] ) match_calc_del( which, matrices, currentw, icyc, seq1, eff1, jcyc, seq2, eff2, 0, lgth2, c, nmask[c], masklist1[c], masklist2[c] );
+ }
+ if( localhom )
+ part_imp_match_out_vead_gapmap( currentw, gapmap1[0]+start1, lgth2, start2, gapmap2 );
+#if 0 // -> tbfast.c
+ if( localhom )
+ imp_match_calc( currentw, icyc, jcyc, lgth1, lgth2, seq1, seq2, eff1, eff2, localhom, 1, 0 );
+
+#endif
+
+ if( outgap == 1 )
+ {
+ for( i=1; i<lgth1+1; i++ )
+ {
+// initverticalw[i] += ( ogcp1[0] + fgcp1[i-1] ) ;
+ initverticalw[i] += ( ogcp1[0] * headgapfreq2 + fgcp1[i-1] * gapfreq2[0] ) ;
+ }
+ for( j=1; j<lgth2+1; j++ )
+ {
+// currentw[j] += ( ogcp2[0] + fgcp2[j-1] ) ;
+ currentw[j] += ( ogcp2[0] * headgapfreq1 + fgcp2[j-1] * gapfreq1[0] ) ;
+ }
+ }
+#if OUTGAP0TRY
+ else
+ {
+ for( j=1; j<lgth2+1; j++ )
+ currentw[j] -= offset * j / 2.0;
+ for( i=1; i<lgth1+1; i++ )
+ initverticalw[i] -= offset * i / 2.0;
+ }
+#endif
+
+ for( j=1; j<lgth2+1; ++j )
+ {
+// m[j] = currentw[j-1] + ogcp1[1]; mp[j] = 0;
+ m[j] = currentw[j-1] + ogcp1[1] * gapfreq2[j-1]; mp[j] = 0;;
+ }
+
+ lastverticalw[0] = currentw[lgth2-1];
+
+ if( outgap ) lasti = lgth1+1; else lasti = lgth1;
+ lastj = lgth2+1;
+
+#if XXXXXXX
+fprintf( stderr, "currentw = \n" );
+for( i=0; i<lgth1+1; i++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[i] );
+}
+fprintf( stderr, "\n" );
+fprintf( stderr, "initverticalw = \n" );
+for( i=0; i<lgth2+1; i++ )
+{
+ fprintf( stderr, "%5.2f ", initverticalw[i] );
+}
+fprintf( stderr, "\n" );
+fprintf( stderr, "fcgp\n" );
+for( i=0; i<lgth1; i++ )
+ fprintf( stderr, "fgcp1[%d]=%f\n", i, ogcp1[i] );
+for( i=0; i<lgth2; i++ )
+ fprintf( stderr, "fgcp2[%d]=%f\n", i, ogcp2[i] );
+#endif
+
+ for( i=1; i<lasti; i++ )
+ {
+
+#ifdef enablemultithread
+// fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref );
+ if( chudanpt && *chudanpt != chudanref )
+ {
+// fprintf( stderr, "\n\n## CHUUDAN!!! i\n" );
+ *chudanres = 1;
+ if( masklist1 ) freeintmtx( masklist1, maxdistclass ); masklist1 = NULL;
+ if( masklist2 ) freeintmtx( masklist2, maxdistclass ); masklist2 = NULL;
+ if( nmask ) free( nmask ); nmask = NULL;
+ return( -1.0 );
+ }
+#endif
+
+ wtmp = previousw;
+ previousw = currentw;
+ currentw = wtmp;
+
+ previousw[0] = initverticalw[i-1];
+
+// match_calc( currentw, cpmx1, cpmx2, i, lgth2, doublework, intwork, 0 );
+ fillzero( currentw, lgth2 );
+ for( c=0; c<maxdistclass; c++ )
+ {
+ match_calc_add( matrices[c], currentw, cpmx1s[c], cpmx2s[c], i, lgth2, doublework[c], intwork[c], 0 );
+ if( nmask[c] ) match_calc_del( which, matrices, currentw, icyc, seq1, eff1, jcyc, seq2, eff2, i, lgth2, c, nmask[c], masklist1[c], masklist2[c] );
+ }
+#if 0
+fprintf( stderr, "\n" );
+fprintf( stderr, "i=%d\n", i );
+fprintf( stderr, "currentw before imp = \n" );
+for( j=0; j<lgth2; j++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[j] );
+}
+fprintf( stderr, "\n" );
+#endif
+ if( localhom )
+ {
+// fprintf( stderr, "Calling imp_match_calc (o) lgth = %d, i = %d\n", lgth1, i );
+// imp_match_out_vead( currentw, i, lgth2 );
+ part_imp_match_out_vead_gapmap( currentw, gapmap1[i]+start1, lgth2, start2, gapmap2 );
+ }
+#if 0
+fprintf( stderr, "specificity = %f\n", specificityconsideration );
+fprintf( stderr, "i=%d\n", i );
+fprintf( stderr, "currentw = \n" );
+for( j=0; j<lgth2; j++ )
+{
+ fprintf( stderr, "%5.2f ", currentw[j] );
+}
+fprintf( stderr, "\n" );
+#endif
+ currentw[0] = initverticalw[i];
+
+
+// mi = previousw[0] + ogcp2[1]; mpi = 0;
+ mi = previousw[0] + ogcp2[1] * gapfreq1[i-1]; mpi=0;
+
+ ijppt = ijp[i] + 1;
+ mjpt = m + 1;
+ prept = previousw;
+ curpt = currentw + 1;
+ mpjpt = mp + 1;
+ fgcp2pt = fgcp2;
+ ogcp2pt = ogcp2+1;
+ fgcp1va = fgcp1[i-1];
+ ogcp1va = ogcp1[i];
+ gf1va = gapfreq1[i];
+ gf1vapre = gapfreq1[i-1];
+ gf2pt = gapfreq2+1;
+ gf2ptpre = gapfreq2;
+
+ if( trywarp )
+ {
+ prevwmrecordspt = prevwmrecords;
+ wmrecordspt = wmrecords+1;
+ wmrecords1pt = wmrecords;
+ warpipt = warpi + 1;
+ warpjpt = warpj + 1;
+ }
+
+ for( j=1; j<lastj; j++ )
+ {
+#ifdef xxxenablemultithread
+// fprintf( stderr, "chudan = %d, %d\n", *chudanpt, chudanref );
+ if( chudanpt && *chudanpt != chudanref )
+ {
+// fprintf( stderr, "\n\n## CHUUDAN!!! j\n" );
+ *chudanres = 1;
+ if( masklist1 ) freeintmtx( masklist1, maxdistclass ); masklist1 = NULL;
+ if( masklist2 ) freeintmtx( masklist2, maxdistclass ); masklist2 = NULL;
+ if( nmask ) free( nmask ); nmask = NULL;
+ return( -1.0 );
+ }
+#endif
+ wm = *prept;
+ *ijppt = 0;
+
+#if 0
+ fprintf( stderr, "%5.0f->", wm );
+#endif
+// g = mi + *fgcp2pt * gapfreq1[i];
+ if( (g = mi + *fgcp2pt * gf1va) > wm )
+ {
+ wm = g;
+ *ijppt = -( j - mpi );
+ }
+// g = *prept + *ogcp2pt * gapfreq1[i-1];
+ if( (g = *prept + *ogcp2pt * gf1vapre) >= mi )
+ {
+ mi = g;
+ mpi = j-1;
+ }
+#if USE_PENALTY_EX
+ mi += fpenalty_ex;
+#endif
+
+// g = *mjpt + fgcp1va * gapfreq2[j];
+ if( (g = *mjpt + fgcp1va * *gf2pt) > wm )
+ {
+ wm = g;
+ *ijppt = +( i - *mpjpt );
+ }
+// g = *prept + ogcp1va * gapfreq2[j-1];
+ if( (g = *prept + ogcp1va * *gf2ptpre) >= *mjpt )
+ {
+ *mjpt = g;
+ *mpjpt = i-1;
+ }
+#if USE_PENALTY_EX
+ m[j] += fpenalty_ex;
+#endif
+ if( trywarp )
+ {
+#if USE_PENALTY_EX
+ if( ( g=*prevwmrecordspt++ + fpenalty_shift + fpenalty_ex * ( i - prevwarpi[j-1] + j - prevwarpj[j-1] ) ) > wm ) // naka ha osokute kamawanai
+#else
+ if( ( g=*prevwmrecordspt++ + fpenalty_shift ) > wm ) // naka ha osokute kamawanai
+#endif
+ {
+ if( warpn && prevwarpi[j-1] == warpis[warpn-1] && prevwarpj[j-1] == warpjs[warpn-1] )
+ {
+ *ijppt = warpbase + warpn - 1;
+ }
+ else
+ {
+ *ijppt = warpbase + warpn;
+ warpis = realloc( warpis, sizeof(int) * ( warpn+1 ) );
+ warpjs = realloc( warpjs, sizeof(int) * ( warpn+1 ) );
+ warpis[warpn] = prevwarpi[j-1];
+ warpjs[warpn] = prevwarpj[j-1];
+ warpn++;
+ }
+ wm = g;
+ }
+
+ curm = *curpt + wm;
+ if( *wmrecords1pt > *wmrecordspt )
+ {
+ *wmrecordspt = *wmrecords1pt;
+ *warpipt = *(warpipt-1);
+ *warpjpt = *(warpjpt-1);
+ }
+ if( curm > *wmrecordspt )
+ {
+ *wmrecordspt = curm;
+ *warpipt = i;
+ *warpjpt = j;
+ }
+ wmrecordspt++;
+ wmrecords1pt++;
+ warpipt++;
+ warpjpt++;
+ }
+
+#if 0
+ fprintf( stderr, "%5.0f ", wm );
+#endif
+ *curpt += wm;
+ ijppt++;
+ mjpt++;
+ prept++;
+ mpjpt++;
+ curpt++;
+ fgcp2pt++;
+ ogcp2pt++;
+ gf2ptpre++;
+ gf2pt++;
+
+ }
+ lastverticalw[i] = currentw[lgth2-1];
+
+ if( trywarp )
+ {
+ fltncpy( prevwmrecords, wmrecords, lastj );
+ intncpy( prevwarpi, warpi, lastj );
+ intncpy( prevwarpj, warpj, lastj );
+ }
+ }
+ if( trywarp )
+ {
+// fprintf( stderr, "wm = %f\n", wm );
+// fprintf( stderr, "warpn = %d\n", warpn );
+ free( wmrecords );
+ free( prevwmrecords );
+ free( warpi );
+ free( warpj );
+ free( prevwarpi );
+ free( prevwarpj );
+ }
+
+#if OUTGAP0TRY
+ if( !outgap )
+ {
+ for( j=1; j<lgth2+1; j++ )
+ currentw[j] -= offset * ( lgth2 - j ) / 2.0;
+ for( i=1; i<lgth1+1; i++ )
+ lastverticalw[i] -= offset * ( lgth1 - i / 2.0);
+ }
+#endif
+
+ /*
+ fprintf( stderr, "\n" );
+ for( i=0; i<icyc; i++ ) fprintf( stderr,"%s\n", seq1[i] );
+ fprintf( stderr, "#####\n" );
+ for( j=0; j<jcyc; j++ ) fprintf( stderr,"%s\n", seq2[j] );
+ fprintf( stderr, "====>" );
+ for( i=0; i<icyc; i++ ) strcpy( mseq1[i], seq1[i] );
+ for( j=0; j<jcyc; j++ ) strcpy( mseq2[j], seq2[j] );
+ */
+ if( localhom )
+ {
+ Atracking_localhom( impmatch, currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, start1, end1, start2, end2, gapmap1, gapmap2, warpis, warpjs, warpbase );
+ }
+ else
+ Atracking( currentw, lastverticalw, seq1, seq2, mseq1, mseq2, ijp, icyc, jcyc, warpis, warpjs, warpbase );
+
+ if( warpis ) free( warpis );
+ if( warpjs ) free( warpjs );
+
+// fprintf( stderr, "### impmatch = %f\n", *impmatch );
+
+ resultlen = strlen( mseq1[0] );
+ if( alloclen < resultlen || resultlen > N )
+ {
+ fprintf( stderr, "alloclen=%d, resultlen=%d, N=%d\n", alloclen, resultlen, N );
+ ErrorExit( "LENGTH OVER!\n" );
+ }
+
+
+ for( i=0; i<icyc; i++ ) strcpy( seq1[i], mseq1[i] );
+ for( j=0; j<jcyc; j++ ) strcpy( seq2[j], mseq2[j] );
+ /*
+ fprintf( stderr, "\n" );
+ for( i=0; i<icyc; i++ ) fprintf( stderr, "%s\n", mseq1[i] );
+ fprintf( stderr, "#####\n" );
+ for( j=0; j<jcyc; j++ ) fprintf( stderr, "%s\n", mseq2[j] );
+ */
+
+ if( masklist1 ) freeintmtx( masklist1, maxdistclass ); masklist1 = NULL;
+ if( masklist2 ) freeintmtx( masklist2, maxdistclass ); masklist2 = NULL;
+ if( nmask ) free( nmask ); nmask = NULL;
+
+ return( wm );
+}
--- /dev/null
+#! /usr/bin/env ruby
+
+$MAFFTCOMMAND = '"/usr/local/bin/mafft"'
+# Edit the above line to specify the location of mafft.
+# $MAFFTCOMMAND = '"C:\folder name\mafft.bat"' # windows
+# $MAFFTCOMMAND = '"/usr/local/bin/mafft"' # mac or cygwin
+# $MAFFTCOMMAND = '"/usr/bin/mafft"' # linux (rpm)
+# $MAFFTCOMMAND = '"/somewhere/mafft.bat"' # all-in-one version for linux or mac
+
+#####################################################################
+#
+# regionalrealignment.rb version 0.2 (2013/Sep/21)
+# ruby regionalrealignment.rb setting input > output
+# See http://mafft.cbrc.jp/alignment/software/regionalrealignment.html
+#
+# v0.2, 2013/Sep/21, Fixed a windows-specific bug.
+#
+#####################################################################
+
+
+def readfasta( fp, name, seq )
+ nseq = 0
+ tmpseq = ""
+ while fp.gets
+ if $_ =~ /^>/ then
+ name.push( $_.sub(/>/,"").strip )
+ seq.push( tmpseq ) if nseq > 0
+ nseq += 1
+ tmpseq = ""
+ else
+ tmpseq += $_.strip
+ end
+ end
+ seq.push( tmpseq )
+ return nseq
+end
+
+def resolve( tree )
+ while 1
+# p tree
+ tree.sub!( /\,([0-9]+):(\-?[0-9\.]+)\,([0-9]+):(\-?[0-9\.]+)/, ",XXX" )
+ hit1 = $1
+ hit2 = $2
+ hit3 = $3
+ hit4 = $4
+
+# p hit1
+# p hit2
+# p hit3
+# p hit4
+
+# puts "introduce XXX"
+# p tree
+
+ break unless tree.index(/XXX/)
+
+ poshit = tree.index(/XXX/)
+# puts "poshit=" + poshit.to_s
+
+ i = poshit
+ height = 0
+ while i >= 0
+ break if height == 0 && tree[i..i] == '('
+ if tree[i..i] == ')' then
+ height += 1
+ elsif tree[i..i] == '(' then
+ height -= 1
+ end
+ i -= 1
+ end
+
+ poskakko = i
+# puts "poskakko = " + poskakko.to_s
+ zenhan = tree[0..poskakko]
+ zenhan = "" if poskakko == -1
+# puts "zenhan = " + zenhan
+
+ treelen = tree.length
+ tree = zenhan + "(" + tree[poskakko+1..treelen]
+# puts "add ("
+# p tree
+ tree.sub!( /XXX/, "#{hit1}:#{hit2}):0,#{hit3}:#{hit4}" )
+
+# p tree
+end
+
+
+return tree
+
+end
+
+if ARGV.length != 2 then
+ STDERR.puts ""
+ STDERR.puts "Usage: ruby #{$0} setingfile inputfile > output"
+ STDERR.puts ""
+ exit 1
+end
+
+infilename = ARGV[1]
+tname = []
+tseq = []
+infp = File.open( infilename, "r" )
+tin = readfasta( infp, tname, tseq )
+infp.close
+
+if tin == 0 then
+ STDERR.puts ""
+ STDERR.puts "Error in the '#{infilename}' file. Is this FASTA format?\n"
+ STDERR.puts ""
+ exit 1
+end
+
+alnlen = tseq[0].length
+if alnlen == 0 then
+ STDERR.puts ""
+ STDERR.puts "Error in the '#{infilename}' file. Is this FASTA format?\n"
+ STDERR.puts ""
+ exit 1
+end
+
+
+for i in 0..(tin-1)
+ if alnlen != tseq[i].length then
+ STDERR.puts ""
+ STDERR.puts "Please insert gaps such that all the input sequences have the same length.\n"
+ STDERR.puts ""
+ exit 1
+ end
+end
+
+checkmap = []
+for i in 0..(alnlen-1)
+ checkmap.push(0)
+end
+
+outputseq = []
+for i in 0..(tin-1)
+ outputseq.push("")
+end
+
+
+settingfile = ARGV[0].to_s
+reg = []
+startpos = []
+endpos = []
+realign = []
+options = []
+treeoption = ""
+revwarn = 0
+sfp = File.open( settingfile, "r" )
+while line = sfp.gets
+ line.sub!(/#.*/,"")
+ next if line.length < 2
+ if line.strip =~ /^treeoption / then
+ treeoption = line.strip.sub(/.*treeoption/,"")
+ break
+ end
+end
+sfp.close
+sfp = File.open( settingfile, "r" )
+while line = sfp.gets
+ line.sub!(/#.*/,"")
+ next if line.length < 2
+ next if line.strip =~ /^treeoption/
+ startposv = line.split(' ')[0].to_i - 1
+ endposv = line.split(' ')[1].to_i - 1
+ if startposv < 0 || endposv < 0 then
+ STDERR.puts "\nError in the '#{settingfile}' file. Please check this line:\n"
+ STDERR.puts line
+ STDERR.puts "Sites must be numbered as 1, 2, ...\n"
+ STDERR.puts "\n"
+ exit 1
+ end
+ if startposv >= alnlen || endposv >= alnlen then
+ STDERR.puts "\nError in the '#{settingfile}' file. Please check this line:\n"
+ STDERR.puts line
+ STDERR.puts "Sites must be numbered as 1, 2, ... #{alnlen}\n"
+ STDERR.puts "\n"
+ exit 1
+ end
+ if startposv > endposv then
+ STDERR.puts "\nWarning. Please check this line:\n"
+ STDERR.puts line
+ STDERR.puts "Start position > End position ?\n"
+ STDERR.puts "\n"
+ revwarn = 1
+# exit 1
+ end
+ startpos.push( startposv )
+ endpos.push( endposv )
+ if startposv > endposv
+ for k in (endposv)..(startposv)
+ checkmap[k] += 1
+ end
+ else
+ for k in (startposv)..(endposv)
+ checkmap[k] += 1
+ end
+ end
+ if line.split(' ')[2] == "realign" then
+ realign.push( 1 )
+ elsif line.split(' ')[2] == "preserve" then
+ realign.push( 0 )
+ else
+ STDERR.puts "\n"
+ STDERR.puts "The third column must be 'realign' or 'preserve'\n"
+ STDERR.puts "Please check this line:\n"
+ STDERR.puts line
+ STDERR.puts "\n"
+ exit 1
+ end
+ if line =~ / \-\-/ && line =~ /realign/ then
+ options.push( line.sub(/.*realign/,"").strip )
+ else
+ options.push( treeoption )
+ end
+end
+sfp.close
+
+#p startpos
+#p endpos
+#p options
+
+
+#res = system "#{$MAFFTCOMMAND} #{treeoption} --treeout --retree 0 --thread -1 #{infilename} > _dum"
+res = system "#{$MAFFTCOMMAND} #{treeoption} --treeout --retree 0 #{infilename} > _dum"
+
+if res == false then
+ STDERR.puts "\n"
+ STDERR.puts "ERROR in building a guide tree"
+ STDERR.puts "\n"
+ exit 1
+end
+
+treefp = File.open( "#{infilename}.tree", "r" )
+
+tree = ""
+while line = treefp.gets
+ tree += line.strip
+ break if tree =~ /;$/
+end
+treefp.close
+
+tree = tree.gsub( /_.*?:/, ":" ).gsub(/[0-9]\.[0-9]*e-[0-9][0-9]/, "0").gsub(/\[.*?\]/,"").gsub(/ /, "")
+scale = 1.0
+mtreefp = File.open("_tree", "w")
+
+
+#STDERR.puts "Tree = " + tree
+
+memi = [-1,-1]
+leni = [-1,-1]
+
+while tree.index( /\(/ )
+
+ tree = resolve( tree )
+
+ tree.sub!( /\(([0-9]+):(\-?[0-9\.]+),([0-9]+):(\-?[0-9\.]+)\)/, "XXX" )
+ memi[0] = $1.to_i
+ leni[0] = $2.to_f * scale
+ memi[1] = $3.to_i
+ leni[1] = $4.to_f * scale
+
+ if leni[0] > 10 || leni[1] > 10 then
+ STDERR.puts ""
+ STDERR.puts "Please check the scale of branch length!"
+ STDERR.puts "The unit of branch lengths must be 'substitution/site'"
+ STDERR.puts "If the unit is 'substition' in your tree, please"
+ STDERR.puts "use the scale argument,"
+ STDERR.puts "% newick2mafft scale in > out"
+ STDERR.puts "where scale = 1/(alignment length)"
+ STDERR.puts ""
+ exit 1
+ end
+
+# STDERR.puts "subtree = " + $&
+
+ if memi[1] < memi[0] then
+ memi.reverse!
+ leni.reverse!
+ end
+
+ tree.sub!( /XXX/, memi[0].to_s )
+
+# STDERR.puts "Tree = " + tree
+
+ mtreefp.printf( "%5d %5d %10.5f %10.5f\n", memi[0], memi[1], leni[0], leni[1] )
+
+end
+
+
+mtreefp.close
+
+numreg = startpos.length
+
+for i in 0..(numreg-1)
+
+ partfp = File.open( "_part", "w" )
+ for j in 0..(tin-1)
+ partfp.puts ">" + tname[j]
+ if startpos[i] > endpos[i] then
+ partfp.puts tseq[j][endpos[i]..startpos[i]].reverse
+ else
+ partfp.puts tseq[j][startpos[i]..endpos[i]]
+ end
+ end
+ partfp.close
+
+ if( realign[i] == 1 ) then
+ STDERR.puts "Aligning region #{startpos[i]+1} - #{endpos[i]+1}"
+ res = system "#{$MAFFTCOMMAND} #{options[i]} --inputorder --treein _tree _part > _partout"
+ if res == false then
+ STDERR.puts "\n"
+ STDERR.puts "ERROR in aligning region #{startpos[i]+1} - #{endpos[i]+1}"
+ STDERR.puts "Please check the option:"
+ STDERR.puts "#{options[i]}"
+ STDERR.puts "\n"
+ exit 1
+ end
+
+ else
+ STDERR.puts "Copying region #{startpos[i]+1} - #{endpos[i]+1}"
+# system "cp _part _partout"
+ File.rename( "_part", "_partout" )
+ end
+
+ pname = []
+ pseq = []
+ partfp = File.open( "_partout", "r" )
+ pin = readfasta( partfp, pname, pseq )
+ partfp.close
+ for j in 0..(tin-1)
+ outputseq[j] += pseq[j]
+ end
+end
+
+for j in 0..(tin-1)
+ puts ">" + tname[j]
+ puts outputseq[j]
+end
+
+STDERR.puts "Done."
+
+numdupsites = checkmap.select{|x| x>1}.length
+if numdupsites > 0 then
+ STDERR.puts ""
+ STDERR.puts "#########################################################"
+ STDERR.puts "# Warning: #{numdupsites} sites were duplicatedly selected."
+ STDERR.puts "#########################################################"
+ STDERR.puts ""
+end
+
+numunselectedsites = checkmap.select{|x| x==0}.length
+if numunselectedsites > 0 then
+ STDERR.puts ""
+ STDERR.puts "#########################################################"
+ STDERR.puts "# Warning: #{numunselectedsites} sites were not selected."
+ STDERR.puts "#########################################################"
+ STDERR.puts ""
+end
+
+if revwarn == 1 then
+ STDERR.puts ""
+ STDERR.puts "#########################################################"
+ STDERR.puts "# Warning: The order of sites were reversed."
+ STDERR.puts "#########################################################"
+ STDERR.puts ""
+end
+
+
+STDERR.puts ""
+STDERR.puts " Tree: computed with #{treeoption} --treeout "
+for i in 0..(numreg-1)
+ range = sprintf( "%6d - %6d", startpos[i]+1, endpos[i]+1 )
+ if realign[i] == 1 then
+ STDERR.puts "#{range}: realigned with #{options[i]} --treein (tree)"
+ else
+ STDERR.puts "#{range}: preserved"
+ end
+end
+STDERR.puts ""
+
+File.delete( "_dum" )
+File.delete( "_tree" )
+File.delete( "_part" )
+File.delete( "_partout" )
+
*noutpt = 0;
*loutpt = 0;
fgets( gett, 999, regfp );
+ reg[0][0] = reg[1][0] = reg[2][0] = reg[3][0] = reg[4][0] = 'n';
+ reg[0][1] = reg[1][1] = reg[2][1] = reg[3][1] = reg[4][1] = 0;
sscanf( gett, "%c %s %s %s %s %s", &cmem, reg[0], reg[1], reg[2], reg[3], reg[4] );
if( cmem != 'e' )
{
}
for( j=0; j<5; j++ )
{
+// reporterr( "reg[j]=%s\n", reg[j] );
sscanf( reg[j], "%d-%d-%c", regtable[0]+(j*2), regtable[0]+(j*2)+1, revtable[0]+j );
fprintf( stderr, "%d %d-%d\n", 0, regtable[0][j*2], regtable[0][j*2+1] );
startpos = regtable[0][j*2];
endpos = regtable[0][j*2+1];
+// reporterr( "startpod=%d, endpos=%d, *loutpt=%d\n", startpos, endpos, *loutpt );
if( startpos > endpos )
{
endpos = regtable[0][j*2];
seedoffset = 0;
inputfile = NULL;
+ dorp = NOTSPECIFIED;
while( --argc > 0 && (*++argv)[0] == '-' )
{
switch( c )
{
case 'o':
- seedoffset = atoi( *++argv );
+ seedoffset = myatoi( *++argv );
fprintf( stderr, "seedoffset = %d\n", seedoffset );
--argc;
goto nextoption;
fprintf( stderr, "inputfile = %s\n", inputfile );
--argc;
goto nextoption;
+ case 'D':
+ dorp = 'd';
+ break;
+ case 'P':
+ dorp = 'p';
+ break;
default:
fprintf( stderr, "illegal option %c\n", c );
argc = 0;
infp = stdin;
- dorp = NOTSPECIFIED;
+// dorp = NOTSPECIFIED;
getnumlen_casepreserve( infp, &nlenmin );
fprintf( stderr, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp );
}
else
{
- usual = "ATGCUatgcu-";
+ usual = "ATGCUatgcuBDHKMNRSVWYXbdhkmnrsvwyx-";
replace_unusual( njob, seq, usual, 'n', tolower );
}
for( i=0; i<njob; i++ )
{
- fprintf( stdout, ">%d_oripos_%s\n", i+seedoffset, name[i] );
+ fprintf( stdout, ">_os_%d_oe_%s\n", i+seedoffset, name[i]+1 );
fprintf( stdout, "%s\n", seq[i] );
}
#define DEBUG 0
char *alignmentfile;
+int keeplength;
static void fillorichar( int nseq, int *oripos, char **a, char **s )
{
char *pta, *pts;
for( i=0; i<nseq; i++ )
{
+
pta = a[i];
pts = s[oripos[i]];
while( *pta )
if( *pta++ == 0 )
{
fprintf( stderr, "ERROR!!\n" );
- fprintf( stderr, "alignment is inconsistent with the original sequences\n" );
+ fprintf( stderr, "alignment is inconsistent with the original sequences (1)\n" );
exit( 1 );
}
}
if( *pts != 0 )
{
fprintf( stderr, "ERROR!!\n" );
- fprintf( stderr, "alignment is inconsistent with the original sequences\n" );
+ fprintf( stderr, "alignment is inconsistent with the original sequences (2)\n" );
exit( 1 );
}
}
{
int c;
+ keeplength = 0;
+
while( --argc > 0 && (*++argv)[0] == '-' )
{
while ( (c = *++argv[0]) )
alignmentfile = *++argv;
--argc;
goto nextoption;
+ case 'Z': // add2ndharfarg wo tsukau tame.
+ break;
+ case 'p': // add2ndharfarg wo tsukau tame.
+ break;
+ case 'Y':
+ keeplength = 1;
+ break;
default:
fprintf( stderr, "illegal option %c\n", c );
argc = 0;
{
FILE *infp;
FILE *alfp;
+ FILE *dlfp;
char **name;
char **aname;
char **oname;
char **aseq;
int *nlen;
int *oripos;
- char *npt, *npt2;
+ char *npt, *npt0, *npt2, *pt, *pt2;
int i, o, prelen;
int nlenmin;
int njobs, njoba;
+// int **dlist;
+// int *ndel;
+ char *gett;
arguments( argc, argv );
fprintf( stderr, "No alignment is given.\n" );
exit( 1 );
}
+
+ if( keeplength )
+ {
+ dlfp = fopen( "_deletelist", "r" );
+ if( !dlfp )
+ {
+ fprintf( stderr, "Cannot open _deletefile\n" );
+ exit( 1 );
+ }
+ }
dorp = NOTSPECIFIED;
getnumlen_casepreserve( infp, &nlenmin );
nlen = AllocateIntVec( njob );
oripos = AllocateIntVec( njob );
readData_pointer_casepreserve( infp, name, nlen, seq );
+ fclose( infp );
dorp = NOTSPECIFIED;
getnumlen( alfp );
aname = AllocateCharMtx( njob, B+1 );
oname = AllocateCharMtx( njob, B+1 );
readData_pointer( alfp, aname, nlen, aseq );
+ fclose( alfp );
for( i=0; i<njob; i++ ) gappick_samestring( seq[i] );
+// reporterr( "seq[njob-1] = %s\n", seq[njob-1] );
+
+ if( keeplength )
+ {
+ gett = calloc( 1000, sizeof( char ) );
+// ndel = (int *)calloc( njob, sizeof( int ) );
+// for( i=0; i<njob; i++ ) ndel[i] = 0;
+// dlist = (int **)calloc( njob+1, sizeof( int * ) );
+// for( i=0; i<njob; i++ ) dlist[i] = NULL;
+// dlist[njob] = NULL;
+
+ while( 1 )
+ {
+ fgets( gett, 999, dlfp );
+ if( feof( dlfp ) ) break;
+ sscanf( gett, "%d %d", &i, &o );
+// reporterr( "%d, %d\n", i, o );
+// dlist[i] = realloc( dlist[i], sizeof( int ) * (ndel[i]+2) );
+// dlist[i][ndel[i]] = o;
+// ndel[i]++;
+
+ seq[i][o] = '-';
+
+ }
+ fclose( dlfp );
+ }
+
+ for( i=0; i<njob; i++ )
+ {
+ gappick_samestring( seq[i] );
+ }
+
if( njoba != njobs )
{
fprintf( stderr, "ERROR!!\n" );
fprintf( stderr, "npt2 = %s\n", npt2 );
o = oripos[i] = atoi( npt2 );
- npt = strstr( npt2, "_oripos_" );
+ npt = strstr( npt2, "_oe_" );
if( npt == NULL )
{
fprintf( stderr, "Format error!\n" );
exit( 1 );
}
- npt += 8;
+ npt += 4;
strcat( oname[i], npt+1 );
#endif
- npt = strstr( aname[i], "_oripos_" );
- if( npt == NULL )
+ npt0 = strstr( aname[i], "_os_" );
+ if( npt0 == NULL )
{
fprintf( stderr, "Format error!\n" );
exit( 1 );
}
- npt2 = npt - 2;
- while( isdigit( *npt2-- ) );
- npt2 += 2;
- npt += 8;
+ npt2 = npt0 + 4;
o = oripos[i] = atoi( npt2 );
-// fprintf( stderr, "npt2 = :%s:\n", npt2 );
-// fprintf( stderr, "npt = :%s:\n", npt );
-// fprintf( stderr, "aname[i] = :%s:\n", aname[i] );
-// fprintf( stderr, "npt2-aname[i] = :%d:\n", npt2-aname[i] );
- prelen = npt2-aname[i];
+ npt = strstr( aname[i], "_oe_" );
+ if( npt == NULL )
+ {
+ fprintf( stderr, "Format error!\n" );
+ exit( 1 );
+ }
+ npt += 4;
+
+ pt2 = npt;
+ pt = npt2 - 4;
+ while( *pt ) *pt++ = *pt2++; // okashii
+
+ prelen = npt0-aname[i];
strncpy( oname[i], aname[i], prelen ); oname[i][prelen] = 0;
- strcat( oname[i], npt+1 );
+ strcat( oname[i], npt0 );
+
+#if 0
+ pt = strstr( aname[i], "_numo_e" );
+ if( pt ) pt += 8;
+ else pt = aname[i] + 1;
+
+ if( strstr( pt, "_seed_" ) ) pt += 6;
+
+ fprintf( stderr, "pt = :%s:\n", pt );
+
+#endif
+// fprintf( stderr, "npt0 = :%s:\n", npt0 );
+//
+// reporterr( "oname[i] = %s\n", oname[i] );
+// reporterr( "aname[i] = %s\n", aname[i] );
+// reporterr( " name[i] = %s\n", name[i] );
+
+// fprintf( stderr, "aname[i] = :%s:\n", aname[i] );
+// fprintf( stderr, "pt = :%s:\n", pt );
+// fprintf( stderr, "oname[i] = :%s:\n", oname[i] );
+// fprintf( stderr, "name[o] = :%s:\n", name[o] );
- if( strncmp( npt, name[o], 10 ) )
+ if( strncmp( npt0, name[o]+1, 10 ) )
{
fprintf( stderr, "ERROR!!\n" );
fprintf( stderr, "In input file,\n" );
fprintf( stderr, "name[%d] = %s\n", o, name[o] );
fprintf( stderr, "but in alignment file,\n" );
+ fprintf( stderr, "oname[%d] = %s\n", i, oname[i] );
+ fprintf( stderr, "npt0 = %s\n", npt0 );
+ fprintf( stderr, "prelen = %d\n", prelen );
fprintf( stderr, "name[%d] = %s\n", i, aname[i] );
exit( 1 );
}
FreeCharMtx( aseq );
FreeCharMtx( name );
FreeCharMtx( aname );
+ FreeCharMtx( oname );
free( nlen );
+ free( oripos );
return( 0 );
}
-static float singleribosumscore( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2 )
+#if 0
+static double singleribosumscore( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2 )
{
- float val;
+ double val;
int i, j;
int code1, code2;
code2 = amino_n[(int)s2[j][p2]];
if( code2 > 3 ) code2 = 36;
-// fprintf( stderr, "'l'%c-%c: %f\n", s1[i][p1], s2[j][p2], (float)ribosumdis[code1][code2] );
+// fprintf( stderr, "'l'%c-%c: %f\n", s1[i][p1], s2[j][p2], (double)ribosumdis[code1][code2] );
- val += (float)ribosumdis[code1][code2] * eff1[i] * eff2[j];
+ val += (double)ribosumdis[code1][code2] * eff1[i] * eff2[j];
}
return( val );
}
-static float pairedribosumscore53( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 )
+static double pairedribosumscore53( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 )
{
- float val;
+ double val;
int i, j;
int code1o, code1u, code2o, code2u, code1, code2;
else code2 = 4 + code2o * 4 + code2u;
-// fprintf( stderr, "%c%c-%c%c: %f\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (float)ribosumdis[code1][code2] );
+// fprintf( stderr, "%c%c-%c%c: %f\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (double)ribosumdis[code1][code2] );
if( code1 == 36 || code2 == 36 )
- val += (float)n_dis[code1o][code2o] * eff1[i] * eff2[j];
+ val += (double)n_dis[code1o][code2o] * eff1[i] * eff2[j];
else
- val += (float)ribosumdis[code1][code2] * eff1[i] * eff2[j];
+ val += (double)ribosumdis[code1][code2] * eff1[i] * eff2[j];
}
return( val );
}
-static float pairedribosumscore35( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 )
+static double pairedribosumscore35( int n1, int n2, char **s1, char **s2, double *eff1, double *eff2, int p1, int p2, int c1, int c2 )
{
- float val;
+ double val;
int i, j;
int code1o, code1u, code2o, code2u, code1, code2;
else code2 = 4 + code2u * 4 + code2o;
-// fprintf( stderr, "%c%c-%c%c: %f\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (float)ribosumdis[code1][code2] );
+// fprintf( stderr, "%c%c-%c%c: %f\n", s1[i][p1], s1[i][c1], s2[j][p2], s2[j][c2], (double)ribosumdis[code1][code2] );
if( code1 == 36 || code2 == 36 )
- val += (float)n_dis[code1o][code2o] * eff1[i] * eff2[j];
+ val += (double)n_dis[code1o][code2o] * eff1[i] * eff2[j];
else
- val += (float)ribosumdis[code1][code2] * eff1[i] * eff2[j];
+ val += (double)ribosumdis[code1][code2] * eff1[i] * eff2[j];
}
return( val );
}
+#endif
static void mccaskillextract( char **seq, char **nogap, int nseq, RNApair **pairprob, RNApair ***single, int **sgapmap, double *eff )
int nogaplgth;
int i, j;
int left, right, adpos;
- float prob;
+ double prob;
static TLS int *pairnum;
RNApair *pt, *pt2;
{
if( pairprob[i][j].bestpos > -1 )
{
-// pairprob[i][j].bestscore /= (float)nseq;
+// pairprob[i][j].bestscore /= (double)nseq;
// fprintf( stderr, "pair of %d = %d (%f) %c:%c\n", i, pairprob[i][j].bestpos, pairprob[i][j].bestscore, seq[0][i], seq[0][pairprob[i][j].bestpos] );
}
}
char gett[1000];
FILE *fp;
int left, right, dumm;
- float prob;
+ double prob;
static TLS int pid;
static TLS char fnamein[100];
static TLS char cmd[1000];
if( gett[0] == ',' ) break;
if( gett[0] != ' ' ) continue;
- sscanf( gett, "%d %d %d %f", &left, &right, &dumm, &prob );
+ sscanf( gett, "%d %d %d %lf", &left, &right, &dumm, &prob );
left--;
right--;
}
-void foldrna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***grouprna1, RNApair ***grouprna2, float **impmtx, int *gapmap1, int *gapmap2, RNApair *additionalpair )
+void foldrna( int nseq1, int nseq2, char **seq1, char **seq2, double *eff1, double *eff2, RNApair ***grouprna1, RNApair ***grouprna2, double **impmtx, int *gapmap1, int *gapmap2, RNApair *additionalpair )
{
int i, j;
// int ui, uj;
static TLS RNApair *pairpt1, *pairpt2;
int lgth1 = strlen( seq1[0] );
int lgth2 = strlen( seq2[0] );
- static TLS float **impmtx2;
- static TLS float **map;
+ static TLS double **impmtx2;
+ static TLS double **map;
// double lenfac;
- float prob;
+ double prob;
int **sgapmap1, **sgapmap2;
- char *nogapdum;
- float **tbppmtx;
+// char *nogapdum;
+ double **tbppmtx;
// fprintf( stderr, "nseq1=%d, lgth1=%d\n", nseq1, lgth1 );
odir2 = AllocateCharVec( lgth2+10 );
sgapmap1 = AllocateIntMtx( nseq1, lgth1+1 );
sgapmap2 = AllocateIntMtx( nseq2, lgth2+1 );
- nogapdum = AllocateCharVec( MAX( lgth1, lgth2 ) );
+// nogapdum = AllocateCharVec( MAX( lgth1, lgth2 ) );
pairprob1 = (RNApair **)calloc( lgth1, sizeof( RNApair *) );
pairprob2 = (RNApair **)calloc( lgth2, sizeof( RNApair *) );
map = AllocateFloatMtx( lgth1, lgth2 );
}
else if( RNAscoremtx == 'r' )
{
- for( i=0; i<lgth1; i++ ) for( j=0; j<lgth2; j++ )
- {
- tbppmtx[i][j] = 1.0;
- impmtx2[i][j] = 0.0;
- }
- for( i=0; i<lgth1; i++ ) for( pairpt1=pairprob1[i]; pairpt1->bestpos!=-1; pairpt1++ )
- {
- for( j=0; j<lgth2; j++ ) for( pairpt2=pairprob2[j]; pairpt2->bestpos!=-1; pairpt2++ )
- {
- uido = pairpt1->bestpos;
- ujdo = pairpt2->bestpos;
- prob = pairpt1->bestscore * pairpt2->bestscore;
- if( uido > -1 && ujdo > -1 )
- {
- if( uido > i && j > ujdo )
- {
- impmtx2[i][j] += prob * pairedribosumscore53( nseq1, nseq2, oseq1, oseq2, eff1, eff2, i, j, uido, ujdo ) * consweight_multi;
- tbppmtx[i][j] -= prob;
- }
- else if( i < uido && j < ujdo )
- {
- impmtx2[i][j] += prob * pairedribosumscore35( nseq1, nseq2, oseq1, oseq2, eff1, eff2, i, j, uido, ujdo ) * consweight_multi;
- tbppmtx[i][j] -= prob;
- }
- }
- }
- }
-
-
- for( i=0; i<lgth1; i++ )
- {
- for( j=0; j<lgth2; j++ )
- {
- impmtx2[i][j] += tbppmtx[i][j] * singleribosumscore( nseq1, nseq2, oseq1, oseq2, eff1, eff2, i, j ) * consweight_multi;
- }
- }
+ fprintf( stderr, "Unexpected error. Please contact kazutaka.katoh@aist.go.jp\n" );
}
--argc;
goto nextoption;
case 'k':
- kimuraR = atoi( *++argv );
+ kimuraR = myatoi( *++argv );
// fprintf( stderr, "kimuraR = %d\n", kimuraR );
--argc;
goto nextoption;
case 'b':
- nblosum = atoi( *++argv );
+ nblosum = myatoi( *++argv );
scoremtx = 1;
fprintf( stderr, "blosum %d\n", nblosum );
--argc;
goto nextoption;
case 'j':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = JTT;
fprintf( stderr, "jtt %d\n", pamN );
--argc;
goto nextoption;
case 'm':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = TM;
fprintf( stderr, "TM %d\n", pamN );
break;
/* modification end. */
case 'z':
- fftThreshold = atoi( *++argv );
+ fftThreshold = myatoi( *++argv );
--argc;
goto nextoption;
case 'w':
- fftWinSize = atoi( *++argv );
+ fftWinSize = myatoi( *++argv );
--argc;
goto nextoption;
case 'Z':
int alloclen;
RNApair **pair1;
RNApair **pair2;
- float **map;
+ double **map;
arguments( argc, argv );
--argc;
goto nextoption;
case 'k':
- kimuraR = atoi( *++argv );
+ kimuraR = myatoi( *++argv );
fprintf( stderr, "kimuraR = %d\n", kimuraR );
--argc;
goto nextoption;
int main( int ac, char **av )
{
- int nlen[M];
- static char name[M][B], **seq;
+ int *nlen;
+ static char **name, **seq;
double score;
extern double score_calc_for_score( int, char ** );
getnumlen( stdin );
rewind( stdin );
- seq = AllocateCharMtx( njob, nlenmax );
+ nlen = AllocateIntVec( njob );
+ name = AllocateCharMtx( njob, B+1 );
+ seq = AllocateCharMtx( njob, nlenmax+2 );
- readData( stdin, name, nlen, seq );
+ readData_pointer( stdin, name, nlen, seq );
if( !isaligned( njob, seq ) ) ErrorExit( "Not aligned." );
--- /dev/null
+#!/usr/bin/perl
+
+####################################################################################
+# Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp)
+#
+# Ver. Date Changelog
+####################################################################################
+# 1.0 11.01.13 Initial release
+#
+# **Skipped version 2 to standardise version numbers to seekquencer.pl script**
+#
+# 3.0 04.24.14 Added split option -mod <mafftash-split> for output
+# Uses seekquencer_v3 backend
+#
+# 4.0 05.12.14 Added new options: -run <thread|normal> -trd <count> -noin
+# Sets -seqa fast in seekquencer.pl
+# Uses seekquencer_v4 backend
+#
+# 4.1 05.19.14 Added a check on running REST requests before proceeding
+# to avoid server load problems
+#
+# 4.2 05.27.14 Seq limit processing done in seekquencer.pl script
+# to avoid server load problems
+#
+# 4.3 07.22.14 Added new option: -seqd <uniref100|uniref90|uniref70|uniprot>
+# Blast limit changed from factor of 10 to -blim option
+# Timing on sleep changed; added srand() for making seed
+# Moved the job limit processing to server side
+#
+# 4.4 08.05.14 Modified to work in multiple OS
+#
+#
+####################################################################################
+
+use strict;
+use Getopt::Long;
+use File::Path qw(make_path remove_tree);
+use Cwd;
+use LWP::Simple;
+use LWP::UserAgent;
+
+# to prevent error: Header line too long (limit is 8192)
+use LWP::Protocol::http;
+push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0);
+
+
+
+my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/REST/service.cgi/premafft";
+my ( $INPUTFILE, $IDLISTFILE, $SEQFASTAFILE, $OUTPUTFILE, $SEQFLAG, $STRFLAG, $EVALFLAG, $NOINFLAG );
+my $OUTTYPE = "mafftash";
+
+my $SEQDATABASE = "uniref100";
+my $SEQLIMIT = 100;
+my $SEQBLASTLIMIT = 100;
+
+my $RUNMODE = "normal"; # thread|normal
+my $THREADCOUNT = 3;
+
+
+GetOptions
+(
+ 'inp=s' => \$INPUTFILE,
+ 'idf=s' => \$IDLISTFILE,
+ 'seqf=s' => \$SEQFASTAFILE,
+ 'out=s' => \$OUTPUTFILE,
+ 'str' => \$STRFLAG,
+ 'seq' => \$SEQFLAG,
+ 'seqd=s' => \$SEQDATABASE,
+ 'lim=i' => \$SEQLIMIT,
+ 'blim=i' => \$SEQBLASTLIMIT,
+ 'pre' => \$EVALFLAG,
+ 'noin' => \$NOINFLAG,
+ 'mod=s' => \$OUTTYPE,
+ 'run=s' => \$RUNMODE,
+ 'trd=i' => \$THREADCOUNT,
+
+
+);
+
+my $ISWINDOWS = ( $^O =~ /^MSWin/ ) ? 1 : 0;
+print STDERR "[Seekquencer-premafft 4.4 on $^O]\n";
+
+
+# set temp directory
+my $CWD = getcwd;
+my $TMP = "$CWD/seekpremafft$$";
+make_path($TMP) unless -d $TMP;
+
+
+
+######
+# validation
+help("Required parameter: define input as '-inp' or '-idf' or '-seqf'") if ( !defined $INPUTFILE && !defined $IDLISTFILE && !defined $SEQFASTAFILE );
+help("'-inp' is already defined") if ( defined $INPUTFILE && (defined $IDLISTFILE || defined $SEQFASTAFILE) );
+help("Input file $INPUTFILE does not exist (or filesize is 0)") if ( defined $INPUTFILE && (! -e $INPUTFILE || !-s $INPUTFILE) );
+help("Input file $IDLISTFILE does not exist (or filesize is 0)") if ( defined $IDLISTFILE && (! -e $IDLISTFILE || !-s $IDLISTFILE) );
+help("Input file $SEQFASTAFILE does not exist (or filesize is 0)") if ( defined $SEQFASTAFILE && (! -e $SEQFASTAFILE || !-s $SEQFASTAFILE) );
+help("Required parameter: output file '-out'") unless ( defined $OUTPUTFILE );
+help("Set either '-str' or '-seq' or dont set any at all") if ( defined $STRFLAG && defined $SEQFLAG );
+
+help("Invalid value for '-seqd <uniref100|uniref90|uniref70|uniprot>'") if ( $SEQDATABASE ne "uniref100" && $SEQDATABASE ne "uniref90" && $SEQDATABASE ne "uniref70" && $SEQDATABASE ne "uniprot");
+help("Invalid value for '-mod <fasta|mafftash|mafftash-split>'") if ( $OUTTYPE ne "fasta" && $OUTTYPE ne "mafftash" && $OUTTYPE ne "mafftash-split" );
+help("Invalid value for '-run <thread|normal>'") if ( $RUNMODE ne "thread" && $RUNMODE ne "normal" );
+help("Invalid value for '-trd <count>'; count should be between 1 and 5 (inclusive)") if ( $RUNMODE eq "thread" && ($THREADCOUNT <= 0 || $THREADCOUNT > 5) );
+
+
+######
+# check existing requests
+print STDERR "Checking server status...\n";
+
+# generate seed
+srand($$);
+
+# sleep a bit to give time for lsf response
+sleep(int(rand(6))+1);
+
+
+my $browser = LWP::UserAgent->new;
+$browser->timeout(0);
+
+# get: check if you can send a new request this time
+my $jobsResponse = $browser->get("$BASEURL/isAllowed");
+
+if ( $jobsResponse->is_success )
+{
+ my $status = parseJobQueryResponse($jobsResponse->content);
+ bail("Max jobs reached. The server cannot process your request right now; try again later.", 0) unless $status > 0;
+}
+else
+{
+ bail(sprintf("[%d] %s\n", $jobsResponse->code, parseError($jobsResponse->content)));
+}
+
+
+######
+# make a temporary input if lists were provided
+unless ( defined $INPUTFILE )
+{
+ $INPUTFILE = "$TMP/input.homemade";
+ open INPF, ">$INPUTFILE" or bail("Error writing to input file.");
+
+ if ( defined $IDLISTFILE )
+ {
+ open IDLIST, "<$IDLISTFILE" or bail("Error reading input file.");
+ while( <IDLIST> )
+ {
+ chomp;
+ if ( /(\w{5})/ )
+ {
+ print INPF ">PDBID\n$1\n";
+ }
+ }
+ close IDLIST;
+ }
+
+
+ if ( defined $SEQFASTAFILE )
+ {
+ open FASTA, "<$SEQFASTAFILE" or bail("Error reading input file.");
+ while( <FASTA> )
+ {
+ chomp;
+ print INPF "$_\n";
+ }
+ close FASTA;
+ }
+
+ close INPF;
+}
+
+
+######
+# prepare parameters
+print STDERR "Preparing parameters for service request...\n";
+
+my @parameters = ();
+push(@parameters, "fileinput" => ["$INPUTFILE"]);
+push(@parameters, "out_type" => $OUTTYPE);
+
+push(@parameters, "rest_flag" => "1");
+push(@parameters, "cls_flag" => "1");
+push(@parameters, "pre_flag" => "1") if defined $EVALFLAG;
+push(@parameters, "noin_flag" => "1") if defined $NOINFLAG;
+
+push(@parameters, "run_mode" => $RUNMODE);
+push(@parameters, "thread_count" => $THREADCOUNT) if $RUNMODE eq "thread";
+
+
+if ( defined $STRFLAG )
+{
+ push(@parameters, "str_flag" => "1");
+ push(@parameters, "ash_flag" => "1");
+}
+elsif ( defined $SEQFLAG )
+{
+ push(@parameters, "seq_flag" => "1");
+ push(@parameters, "seq_algorithm" => "fast");
+ push(@parameters, "seq_database" => $SEQDATABASE);
+ push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT);
+ push(@parameters, "seq_outputlimit" => $SEQLIMIT);
+}
+else
+{
+ push(@parameters, "str_flag" => "1");
+ push(@parameters, "ash_flag" => "1");
+ push(@parameters, "seq_flag" => "1");
+ push(@parameters, "seq_algorithm" => "fast");
+ push(@parameters, "seq_database" => $SEQDATABASE);
+ push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT);
+ push(@parameters, "seq_outputlimit" => $SEQLIMIT);
+}
+
+
+
+######
+# start rest service
+print STDERR "Sending service request...\n";
+
+# post: running a mafftash job
+my $postResponse = $browser->post( $BASEURL, \@parameters, 'Content_Type' => 'form-data' );
+bail(sprintf("[%d] %s\n", $postResponse->code, parseError($postResponse->content))) unless($postResponse->is_success);
+
+
+# get response from post request
+my ($status, $seekid) = parseResponse($postResponse->content);
+
+my $MAXTRIES = 3;
+my $STIMER = 5;
+my $timer = 0;
+
+print STDERR "Request sent! Waiting for response...[$seekid]\n";
+
+my $checklist = {};
+
+# wait for results until it becomes available
+while(1)
+{
+ # sleeps for 5+random, 10+random, 15+random, 20+random, 25+random, 30+random ,,, 60+random, 60+random,,,
+ $timer = $timer >= 60 ? 60 : $timer+$STIMER;
+ sleep($timer+int(rand(4)));
+
+ # get: get results for mafftash job
+ my $getResponse = $browser->get("$BASEURL/$seekid");
+
+ if ( $getResponse->is_success )
+ {
+
+ # get response from get request
+ ($status, $seekid) = parseResponse($getResponse->content);
+ next unless ( $status eq "done" );
+
+
+ # if job is finished and ready
+ print STDERR "Results found!\n";
+ my $csfile = "$TMP/checksum";
+ my $try1 = 1;
+
+
+ while(1)
+ {
+ print STDERR "Fetching Results... [Trial $try1]\n";
+
+ if ( is_success(getstore("$BASEURL/get/$seekid/checksum", $csfile)) && -e $csfile && -s $csfile )
+ {
+ # get response from get request
+ $checklist = extractchecksum($csfile);
+ bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 );
+
+
+ foreach my $id ( sort keys %$checklist )
+ {
+ sleep 1;
+ my $checkfile = "$TMP/$id";
+ my $checkid = $checklist->{$id};
+ my $try2 = 1;
+
+ while(1)
+ {
+ unlink $checkfile if -e $checkfile;
+
+ if ( is_success(getstore("$BASEURL/get/$seekid/$id", $checkfile)) && -e $checkfile && -s $checkfile )
+ {
+ last if $ISWINDOWS;
+
+ my $hashid = getchecksum($checkfile);
+ #print STDERR "[hashid]$hashid [checkid]$checkid\n";
+
+ if ($hashid ne "" && $hashid ne $checkid )
+ {
+ #unlink $checkfile if -e $checkfile;
+ bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES;
+ $try2++;
+ sleep $STIMER;
+ }
+ else
+ {
+ last;
+ }
+ }
+ else
+ {
+ bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES;
+ $try2++;
+ sleep $STIMER;
+ }
+ }
+ }
+
+ last;
+ }
+ else
+ {
+ bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES;
+ $try1++;
+ sleep $STIMER;
+ }
+ }
+
+ last;
+
+ }
+ else
+ {
+ bail(sprintf("[%d] %s\n", $getResponse->code, parseError($getResponse->content)));
+ }
+
+}
+
+
+# make sure outputs were generated
+# decompress
+print STDERR "Assembling final results...\n";
+
+foreach my $id ( sort keys %$checklist )
+{
+ if ( $id =~ /^$seekid\.out(\.str|\.seq)?/ )
+ {
+ bail("Error: Output file corrupted!") unless -e "$TMP/$id";
+ appendToFile("$TMP/$id","$OUTPUTFILE".$1);
+ }
+}
+
+cleanup();
+
+
+
+####################
+####################
+
+
+sub parseResponse
+{
+ my $response = shift;
+ my $status = "";
+ my $seekid = "";
+
+ if ( $response =~ /^([^\s:]+):([^\s:]+)$/ )
+ {
+ $seekid = $1;
+ $status = $2;
+ }
+
+ return ($status, $seekid);
+}
+
+
+sub parseJobQueryResponse
+{
+ my $response = shift;
+ my $jobs = 100;
+
+ if ( $response =~ /^(\d+)$/ )
+ {
+ $jobs = $1;
+ }
+
+ return $jobs;
+}
+
+
+sub extractchecksum
+{
+ my $infile = shift;
+ my %dataset = ();
+
+ #open CSUM, "tar -zxf $infile -O|" or return \%dataset;
+ open CSUM, "<$infile" or return \%dataset;
+
+ while(<CSUM>)
+ {
+ chomp;
+ if ( /^(\S+)\s+(\S+)$/ )
+ {
+ $dataset{$2} = $1;
+ }
+ }
+
+ close CSUM;
+
+ return \%dataset;
+}
+
+
+sub parseError
+{
+ my $response = shift;
+
+ #"error":"Invalid number of inputs found."
+ my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : $response;
+ return $errorstr;
+}
+
+
+sub getchecksum
+{
+ my $infile = shift;
+
+ # md5 binary check
+ my $MD5BIN = "";
+
+ if ( -x "/usr/bin/md5sum" )
+ {
+ $MD5BIN = "/usr/bin/md5sum";
+ }
+ elsif ( -x "/sbin/md5" )
+ {
+ $MD5BIN = "/sbin/md5 -q";
+ }
+
+ return "" if $MD5BIN eq "";
+
+
+ my $checksum = "";
+ open MD5EXE, "$MD5BIN $infile|" or return "";
+
+ while(<MD5EXE>)
+ {
+ if (/^(\S+)\s+(\S+)$/)
+ {
+ $checksum = $1;
+ last;
+ }
+ elsif (/^(\S+)$/)
+ {
+ $checksum = $1;
+ last;
+ }
+ }
+
+ close MD5EXE;
+
+ return $checksum;
+
+}
+
+
+sub backticks
+{
+ my $command = shift;
+
+ `$command`;
+ return ($? == -1) ? 0 : 1;
+}
+
+
+sub bail
+{
+ my $str = shift;
+ my $status = shift;
+
+ #0 for success and 1 for error
+ $status = 1 unless defined;
+
+ print STDERR "$str\n" if defined $str;
+
+ cleanup();
+
+ exit($status);
+}
+
+
+sub cleanup
+{
+ return if ($TMP eq "" || !-d $TMP);
+
+ opendir(MAINDIR, $TMP);
+ my @files = readdir(MAINDIR);
+ closedir(MAINDIR);
+
+ foreach my $file (@files)
+ {
+ unlink "$TMP/$file" if -e "$TMP/$file";
+ }
+
+ remove_tree($TMP);
+
+}
+
+
+sub appendToFile
+{
+ my $inpfile = shift;
+ my $outfile = shift;
+
+ open INPF, "<$inpfile" or bail("Server Error: Error in reading file.");
+ open OUTF, ">>$outfile" or bail("Server Error: Error in writing to file.");
+
+ while(<INPF>)
+ {
+ print OUTF $_;
+ }
+
+ close OUTF;
+ close INPF;
+}
+
+
+
+sub help
+{
+ my $str = shift;
+
+ print <<'HELPME';
+
+USAGE
+ ./seekquencer_premafft.pl -inp <INFILE> -out <OUTFILE> [-str|-seq]
+ ./seekquencer_premafft.pl -idf <LISTFILE> -seqf <SEQFASTA> -out <OUTFILE> [-str|-seq]
+
+
+PARAMETERS
+ -inp <INFILE>
+ INFILE is a FASTA-formatted file
+ PDB entries are written as:
+ >PDBID
+ [5-character pdbid+chain]
+
+ While sequence entries are written as:
+ >[id]
+ [sequence]
+
+ -idf <LISTFILE>
+ IDLISTFILE is a file containing a list of pdbids
+ pdbids should be a 5-character pdbid + chain
+
+ -seqf <SEQFASTA>
+ SEQFASTA is a fasta file
+ entries are written as:
+ >[id]
+ [sequence]
+
+ -out <OUTFILE>
+ Results are writen to a file named OUTFILE
+
+ -str
+ Only structures will be collected by Seekquencer
+ If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer
+
+ -seq
+ Only sequences will be collected by Seekquencer
+ If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer
+
+
+OPTIONAL PARAMETERS:
+ -seqd <uniref100|uniref90|uniref70|uniprot>
+ Search Database for sequence homologs. Default value: uniref100
+
+ -lim <count>
+ this sets the maximum number of sequence homologs collected. Default value: 100
+
+ -blim <count>
+ this sets the -b and -v value when running blastall. Default value: 100
+
+ -pre
+ When -str is set, this will compare all structures against all using pdp-ash
+ This would ensure that all structures collected are matching
+ All structures that do not match will be removed
+
+ -noin
+ When set, inputs will not be included in the output
+
+ -mod <mafftash|mafftash-split|fasta>
+ Defines the output format
+ mafftash (default) will print a mafftash-formatted fasta file
+ mafftash-split will make 2 files separating the structures (OUTFILE.str) from sequences (OUTFILE.seq)
+ fasta will print a regular fasta file
+
+ -run <thread|normal>
+ thread will run simultaneous jobs during blast queries (faster but takes more nodes)
+ normal will run sequential blast queries (slower but takes less nodes)
+ Default value: normal
+
+ -trd <count>
+ if -run <thread> is defined, this sets the number of parallel jobs to run. Default value: 3
+
+
+HELPME
+
+ bail($str);
+}
+
--- /dev/null
+#!/usr/bin/perl
+
+####################################################################################
+# Author: KM Amada (kmamada@ifrec.osaka-u.ac.jp)
+#
+# Ver. Date Changelog
+####################################################################################
+# 1.0 11.01.13 Initial release
+#
+# **Skipped version 2 to standardise version numbers to seekquencer.pl script**
+#
+# 3.0 04.24.14 Added split option -mod <mafftash-split> for output
+# Uses seekquencer_v3 backend
+#
+# 4.0 05.12.14 Added new options: -run <thread|normal> -trd <count> -noin
+# Sets -seqa fast in seekquencer.pl
+# Uses seekquencer_v4 backend
+#
+# 4.1 05.19.14 Added a check on running REST requests before proceeding
+# to avoid server load problems
+#
+# 4.2 05.27.14 Seq limit processing done in seekquencer.pl script
+# to avoid server load problems
+#
+# 4.3 07.22.14 Added new option: -seqd <uniref100|uniref90|uniref70|uniprot>
+# Blast limit changed from factor of 10 to -blim option
+# Timing on sleep changed; added srand() for making seed
+# Moved the job limit processing to server side
+#
+# 4.4 08.05.14 Modified to work in multiple OS
+#
+#
+####################################################################################
+
+use strict;
+use Getopt::Long;
+use File::Path qw(make_path remove_tree);
+use Cwd;
+use LWP::Simple;
+use LWP::UserAgent;
+
+# to prevent error: Header line too long (limit is 8192)
+use LWP::Protocol::http;
+push(@LWP::Protocol::http::EXTRA_SOCK_OPTS, MaxLineLength => 0);
+
+
+
+my $BASEURL = "http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/REST/service.cgi/premafft";
+my ( $INPUTFILE, $IDLISTFILE, $SEQFASTAFILE, $OUTPUTFILE, $SEQFLAG, $STRFLAG, $EVALFLAG, $NOINFLAG );
+my $OUTTYPE = "mafftash";
+
+my $SEQDATABASE = "uniref100";
+my $SEQLIMIT = 100;
+my $SEQBLASTLIMIT = 100;
+
+my $RUNMODE = "normal"; # thread|normal
+my $THREADCOUNT = 3;
+
+
+GetOptions
+(
+ 'inp=s' => \$INPUTFILE,
+ 'idf=s' => \$IDLISTFILE,
+ 'seqf=s' => \$SEQFASTAFILE,
+ 'out=s' => \$OUTPUTFILE,
+ 'str' => \$STRFLAG,
+ 'seq' => \$SEQFLAG,
+ 'seqd=s' => \$SEQDATABASE,
+ 'lim=i' => \$SEQLIMIT,
+ 'blim=i' => \$SEQBLASTLIMIT,
+ 'pre' => \$EVALFLAG,
+ 'noin' => \$NOINFLAG,
+ 'mod=s' => \$OUTTYPE,
+ 'run=s' => \$RUNMODE,
+ 'trd=i' => \$THREADCOUNT,
+
+
+);
+
+my $ISWINDOWS = ( $^O =~ /^MSWin/ ) ? 1 : 0;
+print STDERR "[Seekquencer-premafft 4.4 on $^O]\n";
+
+
+# set temp directory
+my $CWD = getcwd;
+my $TMP = "$CWD/seekpremafft$$";
+make_path($TMP) unless -d $TMP;
+
+
+
+######
+# validation
+help("Required parameter: define input as '-inp' or '-idf' or '-seqf'") if ( !defined $INPUTFILE && !defined $IDLISTFILE && !defined $SEQFASTAFILE );
+help("'-inp' is already defined") if ( defined $INPUTFILE && (defined $IDLISTFILE || defined $SEQFASTAFILE) );
+help("Input file $INPUTFILE does not exist (or filesize is 0)") if ( defined $INPUTFILE && (! -e $INPUTFILE || !-s $INPUTFILE) );
+help("Input file $IDLISTFILE does not exist (or filesize is 0)") if ( defined $IDLISTFILE && (! -e $IDLISTFILE || !-s $IDLISTFILE) );
+help("Input file $SEQFASTAFILE does not exist (or filesize is 0)") if ( defined $SEQFASTAFILE && (! -e $SEQFASTAFILE || !-s $SEQFASTAFILE) );
+help("Required parameter: output file '-out'") unless ( defined $OUTPUTFILE );
+help("Set either '-str' or '-seq' or dont set any at all") if ( defined $STRFLAG && defined $SEQFLAG );
+
+help("Invalid value for '-seqd <uniref100|uniref90|uniref70|uniprot>'") if ( $SEQDATABASE ne "uniref100" && $SEQDATABASE ne "uniref90" && $SEQDATABASE ne "uniref70" && $SEQDATABASE ne "uniprot");
+help("Invalid value for '-mod <fasta|mafftash|mafftash-split>'") if ( $OUTTYPE ne "fasta" && $OUTTYPE ne "mafftash" && $OUTTYPE ne "mafftash-split" );
+help("Invalid value for '-run <thread|normal>'") if ( $RUNMODE ne "thread" && $RUNMODE ne "normal" );
+help("Invalid value for '-trd <count>'; count should be between 1 and 5 (inclusive)") if ( $RUNMODE eq "thread" && ($THREADCOUNT <= 0 || $THREADCOUNT > 5) );
+
+
+######
+# check existing requests
+print STDERR "Checking server status...\n";
+
+# generate seed
+srand($$);
+
+# sleep a bit to give time for lsf response
+sleep(int(rand(6))+1);
+
+
+my $browser = LWP::UserAgent->new;
+$browser->timeout(0);
+
+# get: check if you can send a new request this time
+my $jobsResponse = $browser->get("$BASEURL/isAllowed");
+
+if ( $jobsResponse->is_success )
+{
+ my $status = parseJobQueryResponse($jobsResponse->content);
+ bail("Max jobs reached. The server cannot process your request right now; try again later.", 0) unless $status > 0;
+}
+else
+{
+ bail(sprintf("[%d] %s\n", $jobsResponse->code, parseError($jobsResponse->content)));
+}
+
+
+######
+# make a temporary input if lists were provided
+unless ( defined $INPUTFILE )
+{
+ $INPUTFILE = "$TMP/input.homemade";
+ open INPF, ">$INPUTFILE" or bail("Error writing to input file.");
+
+ if ( defined $IDLISTFILE )
+ {
+ open IDLIST, "<$IDLISTFILE" or bail("Error reading input file.");
+ while( <IDLIST> )
+ {
+ chomp;
+ if ( /(\w{5})/ )
+ {
+ print INPF ">PDBID\n$1\n";
+ }
+ }
+ close IDLIST;
+ }
+
+
+ if ( defined $SEQFASTAFILE )
+ {
+ open FASTA, "<$SEQFASTAFILE" or bail("Error reading input file.");
+ while( <FASTA> )
+ {
+ chomp;
+ print INPF "$_\n";
+ }
+ close FASTA;
+ }
+
+ close INPF;
+}
+
+
+######
+# prepare parameters
+print STDERR "Preparing parameters for service request...\n";
+
+my @parameters = ();
+push(@parameters, "fileinput" => ["$INPUTFILE"]);
+push(@parameters, "out_type" => $OUTTYPE);
+
+push(@parameters, "rest_flag" => "1");
+push(@parameters, "cls_flag" => "1");
+push(@parameters, "pre_flag" => "1") if defined $EVALFLAG;
+push(@parameters, "noin_flag" => "1") if defined $NOINFLAG;
+
+push(@parameters, "run_mode" => $RUNMODE);
+push(@parameters, "thread_count" => $THREADCOUNT) if $RUNMODE eq "thread";
+
+
+if ( defined $STRFLAG )
+{
+ push(@parameters, "str_flag" => "1");
+ push(@parameters, "ash_flag" => "1");
+}
+elsif ( defined $SEQFLAG )
+{
+ push(@parameters, "seq_flag" => "1");
+ push(@parameters, "seq_algorithm" => "fast");
+ push(@parameters, "seq_database" => $SEQDATABASE);
+ push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT);
+ push(@parameters, "seq_outputlimit" => $SEQLIMIT);
+}
+else
+{
+ push(@parameters, "str_flag" => "1");
+ push(@parameters, "ash_flag" => "1");
+ push(@parameters, "seq_flag" => "1");
+ push(@parameters, "seq_algorithm" => "fast");
+ push(@parameters, "seq_database" => $SEQDATABASE);
+ push(@parameters, "seq_blastlimit" => $SEQBLASTLIMIT);
+ push(@parameters, "seq_outputlimit" => $SEQLIMIT);
+}
+
+
+
+######
+# start rest service
+print STDERR "Sending service request...\n";
+
+# post: running a mafftash job
+my $postResponse = $browser->post( $BASEURL, \@parameters, 'Content_Type' => 'form-data' );
+bail(sprintf("[%d] %s\n", $postResponse->code, parseError($postResponse->content))) unless($postResponse->is_success);
+
+
+# get response from post request
+my ($status, $seekid) = parseResponse($postResponse->content);
+
+my $MAXTRIES = 3;
+my $STIMER = 5;
+my $timer = 0;
+
+print STDERR "Request sent! Waiting for response...[$seekid]\n";
+
+my $checklist = {};
+
+# wait for results until it becomes available
+while(1)
+{
+ # sleeps for 5+random, 10+random, 15+random, 20+random, 25+random, 30+random ,,, 60+random, 60+random,,,
+ $timer = $timer >= 60 ? 60 : $timer+$STIMER;
+ sleep($timer+int(rand(4)));
+
+ # get: get results for mafftash job
+ my $getResponse = $browser->get("$BASEURL/$seekid");
+
+ if ( $getResponse->is_success )
+ {
+
+ # get response from get request
+ ($status, $seekid) = parseResponse($getResponse->content);
+ next unless ( $status eq "done" );
+
+
+ # if job is finished and ready
+ print STDERR "Results found!\n";
+ my $csfile = "$TMP/checksum";
+ my $try1 = 1;
+
+
+ while(1)
+ {
+ print STDERR "Fetching Results... [Trial $try1]\n";
+
+ if ( is_success(getstore("$BASEURL/get/$seekid/checksum", $csfile)) && -e $csfile && -s $csfile )
+ {
+ # get response from get request
+ $checklist = extractchecksum($csfile);
+ bail("Error retrieving list of compressed files!") unless ( scalar %$checklist > 0 );
+
+
+ foreach my $id ( sort keys %$checklist )
+ {
+ sleep 1;
+ my $checkfile = "$TMP/$id";
+ my $checkid = $checklist->{$id};
+ my $try2 = 1;
+
+ while(1)
+ {
+ unlink $checkfile if -e $checkfile;
+
+ if ( is_success(getstore("$BASEURL/get/$seekid/$id", $checkfile)) && -e $checkfile && -s $checkfile )
+ {
+ last if $ISWINDOWS;
+
+ my $hashid = getchecksum($checkfile);
+ #print STDERR "[hashid]$hashid [checkid]$checkid\n";
+
+ if ($hashid ne "" && $hashid ne $checkid )
+ {
+ #unlink $checkfile if -e $checkfile;
+ bail("Error retrieving compressed file from server! [Checksum Failed]") if $try2 >= $MAXTRIES;
+ $try2++;
+ sleep $STIMER;
+ }
+ else
+ {
+ last;
+ }
+ }
+ else
+ {
+ bail("Error retrieving compressed file from server!") if $try2 >= $MAXTRIES;
+ $try2++;
+ sleep $STIMER;
+ }
+ }
+ }
+
+ last;
+ }
+ else
+ {
+ bail("Error retrieving list of compressed files from server!") if $try1 >= $MAXTRIES;
+ $try1++;
+ sleep $STIMER;
+ }
+ }
+
+ last;
+
+ }
+ else
+ {
+ bail(sprintf("[%d] %s\n", $getResponse->code, parseError($getResponse->content)));
+ }
+
+}
+
+
+# make sure outputs were generated
+# decompress
+print STDERR "Assembling final results...\n";
+
+foreach my $id ( sort keys %$checklist )
+{
+ if ( $id =~ /^$seekid\.out(\.str|\.seq)?/ )
+ {
+ bail("Error: Output file corrupted!") unless -e "$TMP/$id";
+ appendToFile("$TMP/$id","$OUTPUTFILE".$1);
+ }
+}
+
+cleanup();
+
+
+
+####################
+####################
+
+
+sub parseResponse
+{
+ my $response = shift;
+ my $status = "";
+ my $seekid = "";
+
+ if ( $response =~ /^([^\s:]+):([^\s:]+)$/ )
+ {
+ $seekid = $1;
+ $status = $2;
+ }
+
+ return ($status, $seekid);
+}
+
+
+sub parseJobQueryResponse
+{
+ my $response = shift;
+ my $jobs = 100;
+
+ if ( $response =~ /^(\d+)$/ )
+ {
+ $jobs = $1;
+ }
+
+ return $jobs;
+}
+
+
+sub extractchecksum
+{
+ my $infile = shift;
+ my %dataset = ();
+
+ #open CSUM, "tar -zxf $infile -O|" or return \%dataset;
+ open CSUM, "<$infile" or return \%dataset;
+
+ while(<CSUM>)
+ {
+ chomp;
+ if ( /^(\S+)\s+(\S+)$/ )
+ {
+ $dataset{$2} = $1;
+ }
+ }
+
+ close CSUM;
+
+ return \%dataset;
+}
+
+
+sub parseError
+{
+ my $response = shift;
+
+ #"error":"Invalid number of inputs found."
+ my $errorstr = ( $response =~ /\"error\"\s*:\s*\"([^\"]+)\"/ ) ? $1 : $response;
+ return $errorstr;
+}
+
+
+sub getchecksum
+{
+ my $infile = shift;
+
+ # md5 binary check
+ my $MD5BIN = "";
+
+ if ( -x "/usr/bin/md5sum" )
+ {
+ $MD5BIN = "/usr/bin/md5sum";
+ }
+ elsif ( -x "/sbin/md5" )
+ {
+ $MD5BIN = "/sbin/md5 -q";
+ }
+
+ return "" if $MD5BIN eq "";
+
+
+ my $checksum = "";
+ open MD5EXE, "$MD5BIN $infile|" or return "";
+
+ while(<MD5EXE>)
+ {
+ if (/^(\S+)\s+(\S+)$/)
+ {
+ $checksum = $1;
+ last;
+ }
+ elsif (/^(\S+)$/)
+ {
+ $checksum = $1;
+ last;
+ }
+ }
+
+ close MD5EXE;
+
+ return $checksum;
+
+}
+
+
+sub backticks
+{
+ my $command = shift;
+
+ `$command`;
+ return ($? == -1) ? 0 : 1;
+}
+
+
+sub bail
+{
+ my $str = shift;
+ my $status = shift;
+
+ #0 for success and 1 for error
+ $status = 1 unless defined;
+
+ print STDERR "$str\n" if defined $str;
+
+ cleanup();
+
+ exit($status);
+}
+
+
+sub cleanup
+{
+ return if ($TMP eq "" || !-d $TMP);
+
+ opendir(MAINDIR, $TMP);
+ my @files = readdir(MAINDIR);
+ closedir(MAINDIR);
+
+ foreach my $file (@files)
+ {
+ unlink "$TMP/$file" if -e "$TMP/$file";
+ }
+
+ remove_tree($TMP);
+
+}
+
+
+sub appendToFile
+{
+ my $inpfile = shift;
+ my $outfile = shift;
+
+ open INPF, "<$inpfile" or bail("Server Error: Error in reading file.");
+ open OUTF, ">>$outfile" or bail("Server Error: Error in writing to file.");
+
+ while(<INPF>)
+ {
+ print OUTF $_;
+ }
+
+ close OUTF;
+ close INPF;
+}
+
+
+
+sub help
+{
+ my $str = shift;
+
+ print <<'HELPME';
+
+USAGE
+ ./seekquencer_premafft.pl -inp <INFILE> -out <OUTFILE> [-str|-seq]
+ ./seekquencer_premafft.pl -idf <LISTFILE> -seqf <SEQFASTA> -out <OUTFILE> [-str|-seq]
+
+
+PARAMETERS
+ -inp <INFILE>
+ INFILE is a FASTA-formatted file
+ PDB entries are written as:
+ >PDBID
+ [5-character pdbid+chain]
+
+ While sequence entries are written as:
+ >[id]
+ [sequence]
+
+ -idf <LISTFILE>
+ IDLISTFILE is a file containing a list of pdbids
+ pdbids should be a 5-character pdbid + chain
+
+ -seqf <SEQFASTA>
+ SEQFASTA is a fasta file
+ entries are written as:
+ >[id]
+ [sequence]
+
+ -out <OUTFILE>
+ Results are writen to a file named OUTFILE
+
+ -str
+ Only structures will be collected by Seekquencer
+ If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer
+
+ -seq
+ Only sequences will be collected by Seekquencer
+ If neither -str nor -seq is set, both structures and sequences will be collected by Seekquencer
+
+
+OPTIONAL PARAMETERS:
+ -seqd <uniref100|uniref90|uniref70|uniprot>
+ Search Database for sequence homologs. Default value: uniref100
+
+ -lim <count>
+ this sets the maximum number of sequence homologs collected. Default value: 100
+
+ -blim <count>
+ this sets the -b and -v value when running blastall. Default value: 100
+
+ -pre
+ When -str is set, this will compare all structures against all using pdp-ash
+ This would ensure that all structures collected are matching
+ All structures that do not match will be removed
+
+ -noin
+ When set, inputs will not be included in the output
+
+ -mod <mafftash|mafftash-split|fasta>
+ Defines the output format
+ mafftash (default) will print a mafftash-formatted fasta file
+ mafftash-split will make 2 files separating the structures (OUTFILE.str) from sequences (OUTFILE.seq)
+ fasta will print a regular fasta file
+
+ -run <thread|normal>
+ thread will run simultaneous jobs during blast queries (faster but takes more nodes)
+ normal will run sequential blast queries (slower but takes less nodes)
+ Default value: normal
+
+ -trd <count>
+ if -run <thread> is defined, this sets the number of parallel jobs to run. Default value: 3
+
+
+HELPME
+
+ bail($str);
+}
+
--argc;
goto nextoption;
case 'k':
- kimuraR = atoi( *++argv );
+ kimuraR = myatoi( *++argv );
fprintf( stderr, "kimuraR = %d\n", kimuraR );
--argc;
goto nextoption;
case 'b':
- nblosum = atoi( *++argv );
+ nblosum = myatoi( *++argv );
scoremtx = 1;
fprintf( stderr, "blosum %d\n", nblosum );
--argc;
goto nextoption;
case 'j':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
fprintf( stderr, "jtt %d\n", pamN );
--argc;
break;
/* modification end. */
case 'z':
- fftThreshold = atoi( *++argv );
+ fftThreshold = myatoi( *++argv );
--argc;
goto nextoption;
case 'w':
- fftWinSize = atoi( *++argv );
+ fftWinSize = myatoi( *++argv );
--argc;
goto nextoption;
case 'Z':
--- /dev/null
+#include "mltaln.h"
+
+#define DEBUG 0
+
+char *directionfile;
+static int show_R_ = 1;
+
+static int subalignment;
+static int subalignmentoffset;
+
+void arguments( int argc, char *argv[] )
+{
+ int c;
+
+ inputfile = NULL;
+ directionfile = NULL;
+ subalignment = 0;
+ subalignmentoffset = 0;
+ show_R_ = 1;
+
+ while( --argc > 0 && (*++argv)[0] == '-' )
+ {
+ while ( (c = *++argv[0]) )
+ {
+ switch( c )
+ {
+ case 'd':
+ directionfile = *++argv;
+ fprintf( stderr, "directionfile = %s\n", directionfile );
+ --argc;
+ goto nextoption;
+ case 'i':
+ inputfile = *++argv;
+ fprintf( stderr, "inputfile = %s\n", inputfile );
+ --argc;
+ goto nextoption;
+ case 'H':
+ subalignment = 1;
+ subalignmentoffset = myatoi( *++argv );
+ --argc;
+ goto nextoption;
+ case 'r':
+ show_R_ = 0;
+ break;
+ default:
+ fprintf( stderr, "illegal option %c\n", c );
+ argc = 0;
+ break;
+ }
+ }
+ nextoption:
+ ;
+ }
+ if( argc != 0 )
+ {
+ fprintf( stderr, "options: Check source file !\n" );
+ exit( 1 );
+ }
+}
+
+
+
+int main( int argc, char *argv[] )
+{
+ FILE *infp;
+ FILE *difp;
+ int nlenmin;
+ char **name;
+ char **seq;
+ char *tmpseq;
+ char line[100];
+ int *nlen;
+ int i, j;
+ int nsubalignments, maxmem;
+ int **subtable = NULL;
+ int *preservegaps = NULL;
+ char firstdir;
+ char *directions;
+
+ arguments( argc, argv );
+
+ reporterr( "subalignment = %d\n", subalignment );
+ reporterr( "subalignmentoffset = %d\n", subalignmentoffset );
+
+
+ if( inputfile )
+ {
+ infp = fopen( inputfile, "r" );
+ if( !infp )
+ {
+ fprintf( stderr, "Cannot open %s\n", inputfile );
+ exit( 1 );
+ }
+ }
+ else
+ infp = stdin;
+
+ if( directionfile )
+ {
+ difp = fopen( directionfile, "r" );
+ if( !difp )
+ {
+ fprintf( stderr, "Cannot open %s\n", directionfile );
+ exit( 1 );
+ }
+ }
+ else
+ {
+ fprintf( stderr, "Give directionfile!\n" );
+ }
+
+
+ dorp = NOTSPECIFIED;
+ getnumlen_casepreserve( infp, &nlenmin );
+
+ fprintf( stderr, "%d x %d - %d %c\n", njob, nlenmax, nlenmin, dorp );
+
+ seq = AllocateCharMtx( njob, nlenmax+1 );
+ tmpseq = AllocateCharVec( MAX( B, nlenmax )+1 );
+ name = AllocateCharMtx( njob, B+1 );
+ nlen = AllocateIntVec( njob );
+ directions = calloc( njob, sizeof( int ) );
+
+ readData_pointer_casepreserve( infp, name, nlen, seq );
+
+
+
+
+
+ for( i=0; i<njob; i++ )
+ {
+ fgets( line, 99, difp );
+ if( line[0] != '_' )
+ {
+ fprintf( stderr, "Format error!\n" );
+ exit( 1 );
+ }
+ if( line[1] == 'R' )
+ {
+ directions[i] = 'R';
+ sreverse( tmpseq, seq[i] );
+ strcpy( seq[i], tmpseq );
+
+ strncpy( tmpseq, name[i]+1, B-3 );
+ tmpseq[B-3] = 0;
+ if( show_R_ )
+ {
+ strcpy( name[i]+1, "_R_" );
+ strcpy( name[i]+4, tmpseq );
+ }
+ else
+ {
+ strcpy( name[i]+1, tmpseq );
+ }
+ }
+ else if( line[1] == 'F' )
+ {
+ directions[i] = 'F';
+ }
+ else
+ {
+ fprintf( stderr, "Format error!\n" );
+ exit( 1 );
+ }
+ }
+
+ if( subalignment )
+ {
+ readsubalignmentstable( njob, NULL, NULL, &nsubalignments, &maxmem );
+ reporterr( "nsubalignments = %d\n", nsubalignments );
+ reporterr( "maxmem = %d\n", maxmem );
+ subtable = AllocateIntMtx( nsubalignments, maxmem+1 );
+ preservegaps = AllocateIntVec( njob );
+ readsubalignmentstable( njob, subtable, preservegaps, NULL, NULL );
+
+ for( j=0; j<nsubalignments; j++ )
+ {
+ reporterr( "Checking directions of sequences in subalignment%d\n", j );
+ firstdir = directions[subtable[j][0]];
+ reporterr( "firstdir = %c\n", firstdir );
+ for( i=0; subtable[j][i]>-1; i++ )
+ {
+ if( directions[subtable[j][i]] != firstdir )
+ {
+ reporterr( "\n\n#############################################################################\n" );
+ reporterr( "\nDirection of nucleotide sequences seems to be inconsistent.\n" );
+ reporterr( "Please check the following two sequences:\n" );
+ reporterr( " Sequece no.%d (%s)\n", subtable[j][0]+1, name[subtable[j][0]] );
+ reporterr( " Sequece no.%d (%s)\n", subtable[j][i]+1, name[subtable[j][i]] );
+ reporterr( "\nThese sequences are in sub alignment no.%d in your setting of --merge,\nbut their directions seem to be different.\n\n", j+1 );
+ reporterr( "#############################################################################\n\n\n\n" );
+ exit( 1 );
+ }
+ }
+ reporterr( "OK!\n" );
+ }
+ }
+
+
+ for( i=0; i<njob; i++ )
+ {
+ fprintf( stdout, ">%s\n", name[i]+1 );
+ fprintf( stdout, "%s\n", seq[i] );
+ }
+
+ free( nlen );
+ FreeCharMtx( seq );
+ FreeCharMtx( name );
+ free( tmpseq );
+
+ return( 0 );
+}
#define END_OF_VEC -1
-static int maxl;
-static int tsize;
void arguments( int argc, char *argv[] )
{
table[point]++;
}
-int commonsextet_p( short *table, int *pointt )
+static int localcommonsextet_p( short *table, int *pointt )
{
int value = 0;
short tmp;
int *grpseq;
char *tmpseq;
int **pointt;
- static char name[M][B];
+ static char **name;
static int nlen[M];
double **mtx;
double **mtx2;
exit( 1 );
}
+ name = AllocateCharMtx( njob, B+1 );
tmpseq = AllocateCharVec( nlenmax+1 );
seq = AllocateCharMtx( njob, nlenmax+1 );
grpseq = AllocateIntVec( nlenmax+1 );
#if 0
FRead( infp, name, nlen, seq );
#else
- readData( infp, name, nlen, seq );
+ readData_pointer( infp, name, nlen, seq );
#endif
fclose( infp );
for( j=i; j<njob; j++ )
{
- score = (double)commonsextet_p( table1, pointt[j] );
+ score = (double)localcommonsextet_p( table1, pointt[j] );
mtx[i][j] = score;
}
free( table1 );
fp = fopen( "hat2", "w" );
if( !fp ) ErrorExit( "Cannot open hat2." );
- WriteHat2( fp, njob, name, mtx2 );
+ WriteHat2_pointer( fp, njob, name, mtx2 );
fclose( fp );
fprintf( stderr, "\n" );
static int treeout;
static int classsize;
static int picksize;
-static int maxl;
-static int tsize;
static int reorder;
static int pid;
static int maxdepth = 0;
static double tokyoripara;
-static double lenfaca, lenfacb, lenfacc, lenfacd;
#define PLENFACA 0.01
#define PLENFACB 10000
#define PLENFACC 10000
}
}
-#if 0
-static void gappickandx0( char *out, char *in )
-{
- char c;
- if( scoremtx == -1 )
- {
- while( *in )
- {
- if( (c=*in++) == '-' )
- ;
- else if( c == 'u' )
- *out++ = 't';
- else if( amino_n[c] < 4 && amino_n[c] > -1 )
- *out++ = c;
- else
- *out++ = 'n';
- }
- }
- else
- {
- while( *in )
- {
- if( (c=*in++) == '-' )
- ;
- else if( amino_n[c] < 20 && amino_n[c] > -1 )
- *out++ = c;
- else
- *out++ = 'X';
- }
- }
- *out = 0;
-}
-
-static int getkouho( int *pickkouho, double prob, int nin, Scores *scores, char **seq ) // 0 < prob < 1
-{
- int nkouho = 0;
- int i, j;
- int *iptr = pickkouho;
- for( i=1; i<nin; i++ )
- {
- if( ( nkouho==0 || rnd() < prob ) && ( scores[i].shimon != scores->shimon || strcmp( seq[scores->numinseq], seq[scores[i].numinseq] ) ) )
- {
-#if 0
- for( j=0; j<nkouho; j++ )
- {
- if( scores[i].shimon == scores[pickkouho[j]].shimon || !strcmp( seq[scores[pickkouho[j]].numinseq], seq[scores[i].numinseq] ) )
- break;
- }
- if( j == nkouho )
-#endif
- {
- *iptr++ = i;
- nkouho++;
-// fprintf( stderr, "ok! nkouho=%d\n", nkouho );
- }
- }
- else
- {
- ;
-// fprintf( stderr, "no! %d-%d\n", 0, scores[i].numinseq );
- }
- }
- fprintf( stderr, "\ndone\n\n" );
- return nkouho;
-}
-
-#endif
static void getfastascoremtx( int **tmpaminodis )
{
tbrweight = 3;
checkC = 0;
treemethod = 'X';
+ sueff_global = 0.1;
contin = 0;
scoremtx = 1;
kobetsubunkatsu = 0;
dorp = NOTSPECIFIED;
ppenalty = -1530;
ppenalty_ex = NOTSPECIFIED;
+ penalty_shift_factor = 1000.0;
poffset = -123;
kimuraR = NOTSPECIFIED;
pamN = NOTSPECIFIED;
classsize = NOTSPECIFIED;
picksize = NOTSPECIFIED;
tokyoripara = NOTSPECIFIED;
+ legacygapcost = 0;
+ nwildcard = 0;
+ outnumber = 0;
while( --argc > 0 && (*++argv)[0] == '-' )
{
switch( c )
{
case 'p':
- picksize = atoi( *++argv );
+ picksize = myatoi( *++argv );
fprintf( stderr, "picksize = %d\n", picksize );
--argc;
goto nextoption;
case 's':
- classsize = atoi( *++argv );
+ classsize = myatoi( *++argv );
fprintf( stderr, "groupsize = %d\n", classsize );
--argc;
goto nextoption;
// fprintf( stderr, "ppenalty = %d\n", ppenalty );
--argc;
goto nextoption;
+ case 'Q':
+ penalty_shift_factor = atof( *++argv );
+ --argc;
+ goto nextoption;
case 'g':
ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
--argc;
goto nextoption;
case 'k':
- kimuraR = atoi( *++argv );
+ kimuraR = myatoi( *++argv );
fprintf( stderr, "kimuraR = %d\n", kimuraR );
--argc;
goto nextoption;
case 'b':
- nblosum = atoi( *++argv );
+ nblosum = myatoi( *++argv );
scoremtx = 1;
// fprintf( stderr, "blosum %d\n", nblosum );
--argc;
goto nextoption;
case 'j':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = JTT;
fprintf( stderr, "jtt %d\n", pamN );
--argc;
goto nextoption;
case 'm':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = TM;
fprintf( stderr, "tm %d\n", pamN );
case 'l':
uselongest = 0;
break;
+ case 'n' :
+ outnumber = 1;
+ break;
#if 1
case 'a':
fmodel = 1;
case 'Z':
fromaln = 1;
break;
- case 'L':
+ case 'U':
doalign = 1;
break;
case 'x':
case 'O':
fftNoAnchStop = 1;
break;
+ case 'L':
+ legacygapcost = 1;
+ break;
#if 0
case 'R':
fftRepeatStop = 1;
case 'a':
alg = 'a';
break;
-#endif
case 'R':
alg = 'R';
break;
case 'Q':
alg = 'Q';
break;
+#endif
case 'A':
alg = 'A';
break;
tbutree = 0;
break;
case 'X':
- treemethod = 'X'; // mix
- break;
+ treemethod = 'X'; // tsukawareteiru ????
+ sueff_global = atof( *++argv );
+ fprintf( stderr, "sueff_global = %f\n", sueff_global );
+ --argc;
+ goto nextoption;
case 'E':
treemethod = 'E'; // upg (average)
break;
treemethod = 'q'; // minimum
break;
case 'z':
- fftThreshold = atoi( *++argv );
+ fftThreshold = myatoi( *++argv );
--argc;
goto nextoption;
case 'w':
- fftWinSize = atoi( *++argv );
+ fftWinSize = myatoi( *++argv );
--argc;
goto nextoption;
+ case ':':
+ nwildcard = 1;
+ break;
default:
fprintf( stderr, "illegal option %c\n", c );
argc = 0;
}
}
-static int maxl;
-static int tsize;
+static int nunknown = 0;
int seq_grp_nuc( int *grp, char *seq )
{
if( tmp < 4 )
*grp++ = tmp;
else
- fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
+ nunknown++;
}
*grp = END_OF_VEC;
return( grp-grpbk );
if( tmp < 6 )
*grp++ = tmp;
else
- fprintf( stderr, "WARNING : Unknown character %c\r", *(seq-1) );
+ nunknown++;
}
*grp = END_OF_VEC;
return( grp-grpbk );
table[point]++;
}
-int commonsextet_p( short *table, int *pointt )
+static int localcommonsextet_p( short *table, int *pointt )
{
int value = 0;
short tmp;
{
int l, len1, len2;
int clus1, clus2;
- float pscore, tscore;
+ double pscore, tscore;
static int *fftlog;
static char *indication1, *indication2;
static double *effarr1 = NULL;
static double *effarr2 = NULL;
static char **mseq1, **mseq2;
- float dumfl = 0.0;
+// double dumfl = 0.0;
+ double dumdb = 0.0;
int ffttry;
int m1, m2;
#if 0
}
#if WEIGHT
- clus1 = fastconjuction_noname( mem1, seq, mseq1, effarr1, weight, indication1 );
- clus2 = fastconjuction_noname( mem2, seq, mseq2, effarr2, weight, indication2 );
+ clus1 = fastconjuction_noname( mem1, seq, mseq1, effarr1, weight, indication1, 0.0 );
+ clus2 = fastconjuction_noname( mem2, seq, mseq2, effarr2, weight, indication2, 0.0 );
#else
clus1 = fastconjuction_noweight( mem1, seq, mseq1, effarr1, indication1 );
clus2 = fastconjuction_noweight( mem2, seq, mseq2, effarr2, indication2 );
fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode\n", len1, len2 );
alg = 'M';
if( commonIP ) FreeIntMtx( commonIP );
+ commonIP = 0;
commonAlloc1 = 0;
commonAlloc2 = 0;
}
{
fprintf( stderr, "\bm" );
// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
+ pscore = Falign_udpari_long( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 );
}
else
{
// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
+ pscore = Falign( NULL, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
}
}
else
case( 'M' ):
fprintf( stderr, "\bm" );
// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
- break;
- case( 'Q' ):
- if( clus1 == 1 && clus2 == 1 )
- {
-// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
- }
- else
- {
-// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
- }
+ pscore = MSalignmm( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
break;
case( 'A' ):
if( clus1 == 1 && clus2 == 1 )
{
// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = G__align11( mseq1, mseq2, *alloclen, outgap, outgap );
+ pscore = G__align11( n_dis_consweight_multi, mseq1, mseq2, *alloclen, outgap, outgap );
}
else
{
// fprintf( stderr, "%d-%d", clus1, clus2 );
- pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+ pscore = A__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1 );
}
break;
default:
int selfscore0;
double **dfromc;
double **dfromcp;
- float **pickmtx;
- float **yukomtx;
+ double **pickmtx;
+ double **yukomtx;
static short *table1;
Scores **outs, *ptr;
int *numin;
int ***topol;
int *treeorder;
int picktmp;
- float **len;
+ double **len;
double minscore;
// double *minscoreinpick;
- float *hanni;
+ double *hanni;
double lenfac;
double longer;
double shorter;
static char **mseq2 = NULL;
double *blastresults = NULL; // by Mathog, a guess
static int palloclen = 0;
- float maxdist;
+ double maxdist;
if( orderpos == NULL )
orderpos = order;
}
free( tmptree );
- *tree = (char *)calloc( treelen + nin + 5, sizeof( char ) );
- if( nin > 1 ) **tree = '(';
- else **tree = '\0';
-// **tree = '\0';
+ *tree = (char *)calloc( treelen + nin + 15, sizeof( char ) );
+ **tree = '\n';
+ if( nin > 1 )
+ {
+ *(*tree+1) = '(';
+ *(*tree+2) = '\0';
+ }
+ else
+ {
+ *(*tree+1) = '\0';
+ }
for( j=0; j<nin-1; j++ )
{
sprintf( *tree+strlen( *tree ), "%d,", scores[j].numinseq+1 );
}
sprintf( *tree+strlen( *tree ), "%d", scores[j].numinseq+1 );
- if( nin > 1 ) strcat( *tree, ")" );
+ if( nin > 1 ) strcat( *tree, ")\n" );
+ else strcat( *tree, "\n" );
// fprintf( stdout, "*tree = %s\n", *tree );
}
{
if( fromaln )
{
-// scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
+// scores[i].score = ( 1.0 - (double)G__align11_noalign( n_disLN, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
scores[i].score = ( 1.0 - (double)naivepairscore11( orialn[scores[i].numinseq], orialn[scores->numinseq], penalty ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
}
else
if( *depthpt == 0 ) fprintf( stderr, "\r%d / %d ", i, nin );
gappick0( mseq2[0], seq[scores[i].numinseq] );
// fprintf( stdout, "### before calc scores[%d] = %f (%c)\n", i, scores[i].score, qinoya == scores->numinseq?'o':'x' );
- scores[i].score = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
+ scores[i].score = ( 1.0 - (double)G__align11_noalign( n_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[i].selfscore ) ) * 1;
// fprintf( stderr, "scores[i] = %f\n", scores[i].score );
// fprintf( stderr, "m1=%s\n", seq[scores[0].numinseq] );
// fprintf( stderr, "m2=%s\n", seq[scores[i].numinseq] );
}
else
{
- scores[i].score = ( 1.0 - (double)commonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac;
+ scores[i].score = ( 1.0 - (double)localcommonsextet_p( table1, scores[i].pointt ) / MIN( selfscore0, scores[i].selfscore ) ) * lenfac;
if( scores[i].score > MAX6DIST ) scores[i].score = MAX6DIST;
}
// if( i ) fprintf( stderr, "%d-%d d %4.2f len %d %d\n", 1, i+1, scores[i].score, scores->orilen, scores[i].orilen );
{
if( s_p_map[j] != -1 )
{
- pickmtx[0][s_p_map[j]] = (float)scores[j].score;
+ pickmtx[0][s_p_map[j]] = (double)scores[j].score;
// fprintf( stderr, "pickmtx[0][%d] = %f\n", s_p_map[j], pickmtx[0][s_p_map[j]] );
}
}
{
// fprintf( stderr, "\r%d / %d ", i, nin );
gappick0( mseq2[0], seq[scores[picks[i]].numinseq] );
- pickmtx[j][i-j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
+ pickmtx[j][i-j] = ( 1.0 - (double)G__align11_noalign( n_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * 1;
// fprintf( stderr, "scores[picks[i]] = %f\n", scores[picks[i]].score );
}
}
}
else
{
- pickmtx[j][i-j] = ( 1.0 - (double)commonsextet_p( table1, scores[picks[i]].pointt ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * lenfac;
+ pickmtx[j][i-j] = ( 1.0 - (double)localcommonsextet_p( table1, scores[picks[i]].pointt ) / MIN( selfscore0, scores[picks[i]].selfscore ) ) * lenfac;
if( pickmtx[j][i-j] > MAX6DIST ) pickmtx[j][i-j] = MAX6DIST;
}
fprintf( stderr, "DIANA!!\n" );
if( npick > 2 )
{
- float avdist;
- float avdist1;
- float avdist2;
- float maxavdist;
+ double avdist;
+ double avdist1;
+ double avdist2;
+ double maxavdist;
int splinter;
int count;
int dochokoho;
}
}
if( count < 1 ) avdist1 = 0.0;
- else avdist1 /= (float)count;
+ else avdist1 /= (double)count;
fprintf( stderr, "docho %d (%dinori), avdist1 = %f\n", dochokoho, p_o_map[dochokoho] + 1, avdist1 );
count = 0;
}
}
if( count < 1 ) avdist2 = 0.0;
- else avdist2 /= (float)count;
+ else avdist2 /= (double)count;
fprintf( stderr, "docho %d (%dinori), avdist2 = %f\n", dochokoho, p_o_map[dochokoho] + 1, avdist2 );
if( avdist2 < avdist1 )
if( npick > 2 )
{
#if 0
- float avdist;
- float maxavdist;
+ double avdist;
+ double maxavdist;
int count;
int splinter;
maxavdist = 0.0;
if( tsukau[i] == 0 ) continue;
for( j=i+1; j<npick; j++ )
{
-// float kijun = maxdist * 1/(npick-2);
-// float kijun = maxavdist * tokyoripara;
- float kijun;
+// double kijun = maxdist * 1/(npick-2);
+// double kijun = maxavdist * tokyoripara;
+ double kijun;
kijun = maxdist * tokyoripara; // atode kakunin
// fprintf( stderr, "%d-%d\n", i, j );
// fprintf( stderr, "maxdist = %f\n", maxdist );
else
{
gappick0( mseq2[0], seq[scores[j].numinseq] );
- dfromc[i][j] = ( 1.0 - (double)G__align11_noalign( amino_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[j].selfscore ) ) * 1;
+ dfromc[i][j] = ( 1.0 - (double)G__align11_noalign( n_disLN, -1200, -60, mseq1, mseq2, palloclen ) / MIN( selfscore0, scores[j].selfscore ) ) * 1;
}
}
}
else
{
- dfromc[i][j] = ( 1.0 - (double)commonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac;
+ dfromc[i][j] = ( 1.0 - (double)localcommonsextet_p( table1, scores[j].pointt ) / MIN( selfscore0, scores[j].selfscore ) ) * lenfac;
if( dfromc[i][j] > MAX6DIST ) dfromc[i][j] = MAX6DIST;
}
}
if( nyuko > 2 )
{
fprintf( stderr, "upgma " );
-// veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
- fixed_musclesupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len, NULL );
+// veryfastsupg_double_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
+ fixed_musclesupg_double_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len, NULL, 1, 1 );
fprintf( stderr, "\r \r" );
}
else
mem2 = AllocateIntVec( njob+1 );
}
-// veryfastsupg_float_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
+// veryfastsupg_double_realloc_nobk_halfmtx( nyuko, yukomtx, topol, len );
-// counteff_simple_float( nyuko, topol, len, eff );
+// counteff_simple_double( nyuko, topol, len, eff );
nlim = nyuko-1;
// fprintf( stdout, ">%s\n", name[i] );
// fprintf( stdout, "%s\n", seq[i] );
}
+ if( nunknown ) fprintf( stderr, "\nThere are %d ambiguous characters\n", nunknown );
// exit( 1 );
#if 0
pscore = 0;
for( pt=seq[i]; *pt; pt++ )
{
+// pscore += amino_dis[(int)*pt][(int)*pt];
pscore += amino_dis[(int)*pt][(int)*pt];
}
scores[i].selfscore = pscore;
table1 = (short *)calloc( tsize, sizeof( short ) );
if( !table1 ) ErrorExit( "Cannot allocate table1\n" );
makecompositiontable_p( table1, pointt[i] );
- scores[i].selfscore = commonsextet_p( table1, pointt[i] );
+ scores[i].selfscore = localcommonsextet_p( table1, pointt[i] );
free( table1 );
}
}
{
int i;
int j;
- float wm;
+ double wm;
struct _shuryoten *next;
struct _shuryoten *prev;
} Shuryoten;
else return( 0 );
}
-static void match_calc( float *match, char **s1, char **s2, int i1, int lgth2 )
+static void match_calc( double *match, char **s1, char **s2, int i1, int lgth2 )
{
int j;
match[j] = amino_dis[(int)(*s1)[i1]][(int)(*s2)[j]];
}
-static float gentracking( int **used,
+static double gentracking( int **used,
char **seq1, char **seq2,
char **mseq1, char **mseq2,
- float **cpmx1, float **cpmx2,
+ double **cpmx1, double **cpmx2,
int **ijpi, int **ijpj, int *off1pt, int *off2pt, int endi, int endj )
{
int l, iin, jin, lgth1, lgth2, k, limk;
}
-float suboptalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt )
+double suboptalign11( char **seq1, char **seq2, int alloclen, int *off1pt, int *off2pt, LocalHom *lhmpt )
/* score no keisan no sai motokaraaru gap no atukai ni mondai ga aru */
{
int k;
int lasti, lastj; /* outgap == 0 -> lgth1, outgap == 1 -> lgth1+1 */
int lgth1, lgth2;
int resultlen;
- float wm = 0.0; // by D.Mathog,
- float g;
- float *currentw, *previousw;
+ double wm = 0.0; // by D.Mathog,
+ double g;
+ double *currentw, *previousw;
#if 1
- float *wtmp;
+ double *wtmp;
int *ijpipt;
int *ijpjpt;
- float *mjpt, *Mjpt, *prept, *curpt;
+ double *mjpt, *Mjpt, *prept, *curpt;
int *mpjpt, *Mpjpt;
#endif
- static float mi, *m;
- static float Mi, *largeM;
+ static double mi, *m;
+ static double Mi, *largeM;
static int **ijpi;
static int **ijpj;
static int mpi, *mp;
static int Mpi, *Mp;
- static float *w1, *w2;
-// static float *match;
- static float *initverticalw; /* kufuu sureba iranai */
- static float *lastverticalw; /* kufuu sureba iranai */
+ static double *w1, *w2;
+// static double *match;
+ static double *initverticalw; /* kufuu sureba iranai */
+ static double *lastverticalw; /* kufuu sureba iranai */
static char **mseq1;
static char **mseq2;
- static float **cpmx1;
- static float **cpmx2;
+ static double **cpmx1;
+ static double **cpmx2;
static int **intwork;
- static float **floatwork;
+ static double **doublework;
static int orlgth1 = 0, orlgth2 = 0;
- float maxwm;
- float tbk;
+ double maxwm;
+ double tbk;
int tbki, tbkj;
int endali, endalj;
-// float localthr = 0.0;
-// float localthr2 = 0.0;
- float fpenalty = (float)penalty;
- float fpenalty_OP = (float)penalty_OP;
- float fpenalty_ex = (float)penalty_ex;
-// float fpenalty_EX = (float)penalty_EX;
- float foffset = (float)offset;
- float localthr = -foffset;
- float localthr2 = -foffset;
+// double localthr = 0.0;
+// double localthr2 = 0.0;
+ double fpenalty = (double)penalty;
+ double fpenalty_OP = (double)penalty_OP;
+ double fpenalty_ex = (double)penalty_ex;
+// double fpenalty_EX = (double)penalty_EX;
+ double foffset = (double)offset;
+ double localthr = -foffset;
+ double localthr2 = -foffset;
static Shuryoten *shuryo = NULL;
int numshuryo;
- float minshuryowm = 0.0; // by D.Mathog
+ double minshuryowm = 0.0; // by D.Mathog
int minshuryopos = 0; // by D.Mathog
- float resf;
+ double resf;
// fprintf( stderr, "@@@@@@@@@@@@@ penalty_OP = %f, penalty_EX = %f, pelanty = %f\n", fpenalty_OP, fpenalty_EX, fpenalty );
FreeFloatMtx( cpmx2 );
fprintf( stderr, "in suboptalign11 step 1.7\n" );
- FreeFloatMtx( floatwork );
+ FreeFloatMtx( doublework );
FreeIntMtx( intwork );
}
largeM = AllocateFloatVec( ll2+2 );
Mp = AllocateIntVec( ll2+2 );
- cpmx1 = AllocateFloatMtx( 26, ll1+2 );
- cpmx2 = AllocateFloatMtx( 26, ll2+2 );
+ cpmx1 = AllocateFloatMtx( nalphabets, ll1+2 );
+ cpmx2 = AllocateFloatMtx( nalphabets, ll2+2 );
- floatwork = AllocateFloatMtx( 26, MAX( ll1, ll2 )+2 );
- intwork = AllocateIntMtx( 26, MAX( ll1, ll2 )+2 );
+ doublework = AllocateFloatMtx( nalphabets, MAX( ll1, ll2 )+2 );
+ intwork = AllocateIntMtx( nalphabets, MAX( ll1, ll2 )+2 );
mseq1 = AllocateCharMtx( njob, ll1+ll2 );
mseq2 = AllocateCharMtx( njob, ll1+ll2 );
fprintf( stderr, "k=%d, shuryo[k].i,j,wm=%d,%d,%f go\n", k, shuryo[k].i, shuryo[k].j, shuryo[k].wm );
resf = gentracking( used, seq1, seq2, mseq1, mseq2, cpmx1, cpmx2, ijpi, ijpj, off1pt, off2pt, shuryo[k].i, shuryo[k].j );
if( resf == -1.0 ) continue;
- putlocalhom3( mseq1[0], mseq2[0], lhmpt, *off1pt, *off2pt, (int)shuryo[k].wm, strlen( mseq1[0] ) );
+ putlocalhom3( mseq1[0], mseq2[0], lhmpt, *off1pt, *off2pt, (int)shuryo[k].wm, strlen( mseq1[0] ), 'h' );
#if 0
fprintf( stderr, "\n" );
fprintf( stderr, ">\n%s\n", mseq1[0] );
#define DEBUG 0
#define IODEBUG 0
#define SCOREOUT 0
+#define SHISHAGONYU 0 // for debug
static int nadd;
static int treein;
static int treeout;
static int distout;
static int noalign;
+static int multidist;
+static int subalignment;
+static int subalignmentoffset;
+static int keeplength;
+static int ndeleted;
+static int mapout;
+static int smoothing;
+static int specifictarget;
+static int callpairlocalalign;
+static int outputhat23;
+static int compacttree = 0;
typedef struct _jobtable
{
int j;
} Jobtable;
+typedef struct _msacompactdistmtxthread_arg // single thread demo tsukau
+{
+ int njob;
+ int thread_no;
+ int *selfscore;
+ double **partmtx;
+ char **seq;
+ int **skiptable;
+ double *mindist;
+ int *mindistfrom;
+ int *jobpospt;
+#ifdef enablemultithread
+ pthread_mutex_t *mutex;
+#endif
+} msacompactdistmtxthread_arg_t;
+
#ifdef enablemultithread
typedef struct _distancematrixthread_arg
{
int njob;
int thread_no;
- float *selfscore;
- float **iscore;
+ int *selfscore;
+ double **iscore;
char **seq;
+ int **skiptable;
Jobtable *jobpospt;
pthread_mutex_t *mutex;
} distancematrixthread_arg_t;
RNApair ***singlerna;
double *effarr_kozo;
int *fftlog;
+ char *mergeoralign;
+ int *targetmap;
pthread_mutex_t *mutex;
pthread_cond_t *treecond;
} treebasethread_arg_t;
#endif
-void arguments( int argc, char *argv[] )
+static void arguments( int argc, char *argv[], int *pac, char **pav, int *tac, char **tav ) // 2 kai yobaremasu.
{
int c;
+ int i;
nthread = 1;
outnumber = 0;
scoreout = 0;
+ spscoreout = 0;
treein = 0;
topin = 0;
rnaprediction = 'm';
tbrweight = 3;
checkC = 0;
treemethod = 'X';
+ sueff_global = 0.1;
contin = 0;
scoremtx = 1;
kobetsubunkatsu = 0;
- dorp = NOTSPECIFIED;
+// dorp = NOTSPECIFIED;
+ ppenalty_dist = NOTSPECIFIED;
ppenalty = NOTSPECIFIED;
+ penalty_shift_factor = 1000.0;
ppenalty_ex = NOTSPECIFIED;
poffset = NOTSPECIFIED;
kimuraR = NOTSPECIFIED;
TMorJTT = JTT;
consweight_multi = 1.0;
consweight_rna = 0.0;
+ multidist = 0;
+ subalignment = 0;
+ subalignmentoffset = 0;
+ legacygapcost = 0;
+ specificityconsideration = 0.0;
+ keeplength = 0;
+ mapout = 0;
+ smoothing = 0;
+ specifictarget = 0;
+ callpairlocalalign = 0;
+ outputhat23 = 0;
+ nwildcard = 0;
+
+ if( pac )
+ {
+ pav[0] = "tbfast-pair";
+ *pac = 1;
+ tav[0] = "tbfast";
+ *tac = 1;
+
+ for( i=0; i<argc; i++ )
+ {
+ if( argv[i][0] == '_' )
+ {
+ callpairlocalalign = 1;
+// reporterr( "start\n" );
+
+ for( i++; i<argc; i++ )
+ {
+ if( argv[i][0] == '_' )
+ {
+// reporterr( "end\n" );
+ goto pavend;
+ }
+ pav[*pac] = argv[i];
+ *pac += 1;
+// reporterr( "%s\n", argv[i] );
+ }
+ }
+ }
+
+
+ pavend:
+
+// reporterr( "i=%d\n", i );
+ for( i++; i<argc; i++ )
+ {
+ tav[*tac] = argv[i];
+ *tac += 1;
+ }
+
+ argc -= *pac + 1;
+ argv += *pac + 1;
+
+// reporterr( "argc in tbfast = %d\n", argc );
+// reporterr( "*pac in tbfast = %d\n", *pac );
+// for( i=0; i<*tac; i++ ) reporterr( "%s\n", tav[i] );
+ }
+ else
+ {
+// reporterr( "SECOND TIME\n" );
+ }
+
+// reporterr( "*argv = %s\n", *argv );
while( --argc > 0 && (*++argv)[0] == '-' )
{
+// reporterr( "(*argv)[0] = %s\n", (*argv) );
while ( ( c = *++argv[0] ) )
{
+// reporterr( "c=%c\n", c );
switch( c )
{
case 'i':
inputfile = *++argv;
- fprintf( stderr, "inputfile = %s\n", inputfile );
+// fprintf( stderr, "inputfile = %s\n", inputfile );
--argc;
goto nextoption;
case 'I':
- nadd = atoi( *++argv );
- fprintf( stderr, "nadd = %d\n", nadd );
+ nadd = myatoi( *++argv );
+// fprintf( stderr, "nadd = %d\n", nadd );
--argc;
goto nextoption;
case 'e':
RNAppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
--argc;
goto nextoption;
+ case 'V':
+ ppenalty_dist = (int)( atof( *++argv ) * 1000 - 0.5 );
+// fprintf( stderr, "ppenalty = %d\n", ppenalty );
+ --argc;
+ goto nextoption;
case 'f':
ppenalty = (int)( atof( *++argv ) * 1000 - 0.5 );
// fprintf( stderr, "ppenalty = %d\n", ppenalty );
--argc;
goto nextoption;
+ case 'Q':
+ penalty_shift_factor = atof( *++argv );
+ --argc;
+ goto nextoption;
case 'g':
ppenalty_ex = (int)( atof( *++argv ) * 1000 - 0.5 );
- fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
+// fprintf( stderr, "ppenalty_ex = %d\n", ppenalty_ex );
--argc;
goto nextoption;
case 'h':
--argc;
goto nextoption;
case 'k':
- kimuraR = atoi( *++argv );
- fprintf( stderr, "kappa = %d\n", kimuraR );
+ kimuraR = myatoi( *++argv );
+// fprintf( stderr, "kappa = %d\n", kimuraR );
--argc;
goto nextoption;
case 'b':
- nblosum = atoi( *++argv );
+ nblosum = myatoi( *++argv );
scoremtx = 1;
- fprintf( stderr, "blosum %d / kimura 200\n", nblosum );
+// fprintf( stderr, "blosum %d / kimura 200\n", nblosum );
--argc;
goto nextoption;
case 'j':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = JTT;
- fprintf( stderr, "jtt/kimura %d\n", pamN );
+// fprintf( stderr, "jtt/kimura %d\n", pamN );
--argc;
goto nextoption;
case 'm':
- pamN = atoi( *++argv );
+ pamN = myatoi( *++argv );
scoremtx = 0;
TMorJTT = TM;
- fprintf( stderr, "tm %d\n", pamN );
+// fprintf( stderr, "tm %d\n", pamN );
--argc;
goto nextoption;
case 'l':
--argc;
goto nextoption;
case 'C':
- nthread = atoi( *++argv );
- fprintf( stderr, "nthread = %d\n", nthread );
+ nthread = myatoi( *++argv );
+// fprintf( stderr, "nthread = %d\n", nthread );
+ --argc;
+#ifndef enablemultithread
+ nthread = 0;
+#endif
+ goto nextoption;
+ case 's':
+ specificityconsideration = (double)myatof( *++argv );
+// fprintf( stderr, "specificityconsideration = %f\n", specificityconsideration );
--argc;
goto nextoption;
case 'R':
rnaprediction = 'r';
- break;
- case 's':
- RNAscoremtx = 'r';
- break;
#if 1
case 'a':
fmodel = 1;
case 'P':
dorp = 'p';
break;
+ case 'L':
+ legacygapcost = 1;
+ break;
#if 1
case 'O':
outgap = 0;
fftNoAnchStop = 1;
break;
#endif
- case 'S':
- scoreout = 1;
+#if 0
+ case 'S' :
+ scoreout = 1; // for checking parallel calculation
+ break;
+#else
+ case 'S' :
+ spscoreout = 1; // 2014/Dec/30, sp score
break;
+#endif
+ case 'H':
+ subalignment = 1;
+ subalignmentoffset = myatoi( *++argv );
+ --argc;
+ goto nextoption;
#if 0
case 'e':
fftscore = 0;
#endif
case 'X':
treemethod = 'X';
- break;
+ sueff_global = atof( *++argv );
+// fprintf( stderr, "sueff_global = %f\n", sueff_global );
+ --argc;
+ goto nextoption;
case 'E':
treemethod = 'E';
break;
case 'a':
alg = 'a';
break;
-#endif
+ case 'H':
+ alg = 'H';
+ break;
case 'Q':
alg = 'Q';
break;
- case 'H':
- alg = 'H';
+#endif
+ case '@':
+ alg = 'd';
break;
case 'A':
alg = 'A';
case 'N':
nevermemsave = 1;
break;
- case 'B':
+ case 'B': // hitsuyou! memopt -M -B no tame
break;
case 'F':
use_fft = 1;
case 'U':
treein = 1;
break;
+#if 0
case 'V':
topin = 1;
break;
+#endif
case 'u':
tbrweight = 0;
weight = 0;
tbrweight = 3;
break;
case 'd':
+ multidist = 1;
+ break;
+#if 0
+ case 'd':
disp = 1;
break;
+#endif
/* Modified 01/08/27, default: user tree */
case 'J':
tbutree = 0;
break;
/* modification end. */
case 'z':
- fftThreshold = atoi( *++argv );
+ fftThreshold = myatoi( *++argv );
--argc;
goto nextoption;
case 'w':
- fftWinSize = atoi( *++argv );
+ fftWinSize = myatoi( *++argv );
+ --argc;
+ goto nextoption;
+ case 'W':
+ minimumweight = atof( *++argv );
+// fprintf( stderr, "minimumweight = %f\n", minimumweight );
--argc;
goto nextoption;
+#if 0
case 'Z':
checkC = 1;
break;
+#endif
+ case 'Y':
+ keeplength = 1;
+ break;
+ case 'Z':
+ mapout = 1;
+ break;
+ case 'p':
+ smoothing = 1;
+ break;
+ case '=':
+ specifictarget = 1;
+ break;
+ case ':':
+ nwildcard = 1;
+ break;
+ case '+':
+ outputhat23 = myatoi( *++argv );
+// fprintf( stderr, "outputhat23 = %f\n", outputhat23 );
+ --argc;
+ goto nextoption;
default:
fprintf( stderr, "illegal option %c\n", c );
argc = 0;
nextoption:
;
}
+
+// reporterr( "argc=%d\n", argc );
+
if( argc == 1 )
{
cut = atof( (*argv) );
}
if( argc != 0 )
{
- fprintf( stderr, "options: Check source file !\n" );
+ fprintf( stderr, "tbfast options: Check source file !\n" );
exit( 1 );
}
if( tbitr == 1 && outgap == 0 )
distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
int njob = targ->njob;
int thread_no = targ->thread_no;
- float *selfscore = targ->selfscore;
- float **iscore = targ->iscore;
+ double *selfscore = targ->selfscore;
+ double **iscore = targ->iscore;
char **seq = targ->seq;
Jobtable *jobpospt = targ->jobpospt;
- float ssi, ssj, bunbo;
+ double ssi, ssj, bunbo;
int i, j;
while( 1 )
ssi = selfscore[i];
if( i % 10 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no );
- for( j=i+1; j<njob; j++)
+ for( j=i+1; j<njob; j++ )
{
ssj = selfscore[j];
bunbo = MIN( ssi, ssj );
if( bunbo == 0.0 )
iscore[i][j-i] = 1.0;
else
- iscore[i][j-i] = 1.0 - naivepairscore11( seq[i], seq[j], penalty ) / bunbo;
+ iscore[i][j-i] = 1.0 - naivepairscore11( seq[i], seq[j], penalty_dist ) / bunbo;
}
}
}
#endif
+static double preferenceval( int ori, int pos, int max ) // for debug
+{
+ pos -= ori;
+ if( pos < 0 ) pos += max;
+ return( 0.00000000000001 * pos );
+}
+
+static void *msacompactdisthalfmtxthread( void *arg ) // enablemultithread == 0 demo tsukau
+{
+ msacompactdistmtxthread_arg_t *targ = (msacompactdistmtxthread_arg_t *)arg;
+ int njob = targ->njob;
+ int thread_no = targ->thread_no;
+ int *selfscore = targ->selfscore;
+ double **partmtx = targ->partmtx;
+ char **seq = targ->seq;
+ int **skiptable = targ->skiptable;
+ double *mindist = targ->mindist;
+ int *mindistfrom = targ->mindistfrom;
+ int *jobpospt = targ->jobpospt;
+ double tmpdist, preference, tmpdistx, tmpdisty;
+ int i, j;
+
+ while( 1 )
+ {
+#ifdef enablemultithread
+ if( nthread )
+ {
+ pthread_mutex_lock( targ->mutex );
+ i = *jobpospt;
+ if( i == njob-1 )
+ {
+ pthread_mutex_unlock( targ->mutex );
+ return( NULL );
+ }
+ *jobpospt = i+1;
+ pthread_mutex_unlock( targ->mutex );
+ }
+ else
+#endif
+ {
+ i = *jobpospt;
+ if( i == njob-1 )
+ {
+ return( NULL );
+ }
+ *jobpospt = i+1;
+ }
+
+ if( i % 100 == 0 )
+ {
+ if( nthread )
+ fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no );
+ else
+ fprintf( stderr, "\r% 5d / %d", i, njob );
+ }
+
+ for( j=i+1; j<njob; j++ )
+ {
+ tmpdist = distcompact_msa( seq[i], seq[j], skiptable[i], skiptable[j], selfscore[i], selfscore[j] ); // osoikedo,
+
+ preference = preferenceval( i, j, njob );
+ tmpdistx = tmpdist + preference;
+ if( tmpdistx < mindist[i] )
+ {
+ mindist[i] = tmpdistx;
+ mindistfrom[i] = j;
+ }
+
+ preference = preferenceval( j, i, njob );
+ tmpdisty = tmpdist + preference;
+ if( tmpdisty < mindist[j] )
+ {
+ mindist[j] = tmpdisty;
+ mindistfrom[j] = i;
+ }
+ if( partmtx[i] ) partmtx[i][j] = tmpdist;
+ if( partmtx[j] ) partmtx[j][i] = tmpdist;
+ }
+ }
+}
+
+
#ifdef enablemultithread
-static void *distancematrixthread( void *arg )
+#if 0
+static void *distancematrixthread( void *arg ) // v7.2 ijou deha tsukawanaihazu
{
distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
int njob = targ->njob;
int thread_no = targ->thread_no;
- float *selfscore = targ->selfscore;
- float **iscore = targ->iscore;
+ double *selfscore = targ->selfscore;
+ double **iscore = targ->iscore;
char **seq = targ->seq;
+ int **skiptable = targ->skiptable;
Jobtable *jobpospt = targ->jobpospt;
- float ssi, ssj, bunbo;
+ double ssi, ssj, bunbo;
int i, j;
while( 1 )
ssj = selfscore[j];
bunbo = MIN( ssi, ssj );
if( bunbo == 0.0 )
- iscore[i][j-i] = 1.0;
+ iscore[i][j-i] = 2.0; // 2013/Oct/17
else
- iscore[i][j-i] = 1.0 - naivepairscore11( seq[i], seq[j], penalty ) / bunbo;
+// iscore[i][j-i] = ( 1.0 - naivepairscore11( seq[i], seq[j], penalty_dist ) / bunbo ) * 2.0; // 2013/Oct/17
+ iscore[i][j-i] = ( 1.0 - naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast
+ if( iscore[i][j-i] > 10 ) iscore[i][j-i] = 10.0; // 2015/Mar/17
}
}
+#else
+static void *distancematrixthread( void *arg ) // v7.2 ijou deha tsukawanaihazu
+{
+ distancematrixthread_arg_t *targ = (distancematrixthread_arg_t *)arg;
+ int njob = targ->njob;
+ int thread_no = targ->thread_no;
+ int *selfscore = targ->selfscore;
+ double **iscore = targ->iscore;
+ char **seq = targ->seq;
+ int **skiptable = targ->skiptable;
+ Jobtable *jobpospt = targ->jobpospt;
+
+ int ssi, ssj, bunbo;
+ int i, j;
+
+ while( 1 )
+ {
+ pthread_mutex_lock( targ->mutex );
+ i = jobpospt->i; // (jobpospt-i)++ dato, shuuryou hantei no mae ni ++ surunode, tomaranakunaru.
+
+ if( i == njob-1 )
+ {
+ pthread_mutex_unlock( targ->mutex );
+ return( NULL );
+ }
+ jobpospt->i += 1;
+ pthread_mutex_unlock( targ->mutex );
+ if( i % 100 == 0 ) fprintf( stderr, "\r% 5d / %d (thread %4d)", i, njob, thread_no );
+
+ ssi = selfscore[i];
+ for( j=i+1; j<njob; j++ )
+ {
+ ssj = selfscore[j];
+ bunbo = MIN( ssi, ssj );
+ if( bunbo == 0 )
+ iscore[i][j-i] = 2.0; // 2013/Oct/17
+ else
+// iscore[i][j-i] = ( 1.0 - naivepairscore11( seq[i], seq[j], penalty_dist ) / bunbo ) * 2.0; // 2013/Oct/17
+ iscore[i][j-i] = ( 1.0 - naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast
+ if( iscore[i][j-i] > 10.0 ) iscore[i][j-i] = 10.0; // 2015/Mar/17
+ }
+ }
+}
+#endif
static void *treebasethread( void *arg )
{
treebasethread_arg_t *targ = (treebasethread_arg_t *)arg;
RNApair ***singlerna = targ->singlerna;
double *effarr_kozo = targ->effarr_kozo;
int *fftlog = targ->fftlog;
+ int *targetmap = targ->targetmap;
+ char *mergeoralign = targ->mergeoralign;
char **mseq1, **mseq2;
char **localcopy;
int i, j, l;
int len1, len2;
int clus1, clus2;
- float pscore;
+ double pscore;
char *indication1, *indication2;
double *effarr1 = NULL;
double *effarr2 = NULL;
double *effarr1_kozo = NULL;
double *effarr2_kozo = NULL;
LocalHom ***localhomshrink = NULL;
+ char *swaplist = NULL;
int m1, m2;
- float dumfl = 0.0;
+// double dumfl = 0.0;
+ double dumdb = 0.0;
int ffttry;
- RNApair ***grouprna1, ***grouprna2;
+ RNApair ***grouprna1 = NULL, ***grouprna2 = NULL;
+ double **dynamicmtx;
mseq1 = AllocateCharMtx( njob, 0 );
mseq2 = AllocateCharMtx( njob, 0 );
localcopy = calloc( njob, sizeof( char * ) );
+ dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets );
if( rnakozo && rnaprediction == 'm' )
{
indication2 = AllocateCharVec( 150 );
#if 0
#else
+ swaplist = NULL;
if( constraint )
{
+ if( specifictarget ) swaplist = calloc( njob, sizeof( char ) );
localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom ** ) );
- for( i=0; i<njob; i++)
+ for( i=0; i<njob; i++ )
localhomshrink[i] = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
}
#endif
pthread_mutex_unlock( targ->mutex );
if( commonIP ) FreeIntMtx( commonIP );
commonIP = NULL;
- Falign( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
- A__align( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+ Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+ Falign_udpari_long( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL );
+ A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 );
+ D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+ partA__align( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL );
+ G__align11( NULL, NULL, NULL, 0, 0, 0 ); // iru?
free( mseq1 );
free( mseq2 );
free( localcopy );
free( effarr2_kozo );
free( indication1 );
free( indication2 );
+ FreeDoubleMtx( dynamicmtx );
+ if( rnakozo && rnaprediction == 'm' )
+ {
+ if( grouprna1 ) free( grouprna1 ); // nakami ha?
+ if( grouprna2 ) free( grouprna2 ); // nakami ha?
+ grouprna1 = grouprna2 = NULL;
+ }
if( constraint )
{
- for( i=0; i<njob; i++)
- free( localhomshrink[i] );
- free( localhomshrink );
+ if( localhomshrink ) // nen no tame
+ {
+ for( i=0; i<njob; i++ )
+ {
+ free( localhomshrink[i] );
+ localhomshrink[i] = NULL;
+ }
+ free( localhomshrink );
+ localhomshrink = NULL;
+ }
+ if( specifictarget ) free( swaplist );
}
return( NULL );
}
// pthread_mutex_unlock( targ->mutex );
+ if( mergeoralign[l] == 'n' )
+ {
+// fprintf( stderr, "SKIP!\n" );
+ dep[l].done = 1;
+ (*nrunpt)--;
+ pthread_cond_broadcast( targ->treecond );
+ free( topol[l][0] );
+ free( topol[l][1] );
+ free( topol[l] );
+ pthread_mutex_unlock( targ->mutex );
+ continue;
+ }
+
+
m1 = topol[l][0][0];
m2 = topol[l][1][0];
+// fprintf( stderr, "\ndistfromtip = %f\n", dep[l].distfromtip );
+// makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip - 0.5 );
+ makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip );
+
// pthread_mutex_lock( targ->mutex );
+
+
len1 = strlen( aseq[m1] );
len2 = strlen( aseq[m2] );
if( *alloclen <= len1 + len2 )
pthread_mutex_unlock( targ->mutex );
+
+
if( effarr_kozo )
{
clus1 = fastconjuction_noname_kozo( topol[l][0], localcopy, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
clus2 = fastconjuction_noname_kozo( topol[l][1], localcopy, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
}
+#if 0
+ else if( specifictarget )
+ {
+ clus1 = fastconjuction_target( topol[l][0], localcopy, mseq1, effarr1, effarr, indication1, minimumweight, targetmap );
+ clus2 = fastconjuction_target( topol[l][1], localcopy, mseq2, effarr2, effarr, indication2, minimumweight, targetmap );
+ }
+#endif
else
{
- clus1 = fastconjuction_noname( topol[l][0], localcopy, mseq1, effarr1, effarr, indication1 );
- clus2 = fastconjuction_noname( topol[l][1], localcopy, mseq2, effarr2, effarr, indication2 );
+ clus1 = fastconjuction_noname( topol[l][0], localcopy, mseq1, effarr1, effarr, indication1, minimumweight );
+ clus2 = fastconjuction_noname( topol[l][1], localcopy, mseq2, effarr2, effarr, indication2, minimumweight );
}
-
-
#if 1
fprintf( stderr, "\rSTEP % 5d /%d (thread %4d) ", l+1, njob-1, thread_no );
#else
if( constraint )
{
- fastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
+ if( specifictarget )
+ fastshrinklocalhom_target( topol[l][0], topol[l][1], localhomtable, localhomshrink, swaplist, targetmap );
+ else
+ fastshrinklocalhom_half( topol[l][0], topol[l][1], localhomtable, localhomshrink );
// msfastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
// fprintf( stderr, "localhomshrink =\n" );
// outlocalhompt( localhomshrink, clus1, clus2 );
fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 );
alg = 'M';
if( commonIP ) FreeIntMtx( commonIP );
+ commonIP = NULL;
commonAlloc1 = 0;
commonAlloc2 = 0;
}
if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 );
else ffttry = 0;
// ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708
-// fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
+// fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 );
// fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] );
if( constraint == 2 )
{
fprintf( stderr, "c" );
if( alg == 'A' )
{
- imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
+ imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, topol[l][0], topol[l][1] );
if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL );
- pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+ pscore = A__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1 );
}
- else if( alg == 'H' )
+ if( alg == 'd' )
{
- imp_match_init_strictH( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL );
+ imp_match_init_strictD( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, topol[l][0], topol[l][1] );
+ if( rnakozo ) imp_rnaD( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL );
+ pscore = D__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
}
else if( alg == 'Q' )
{
- imp_match_init_strictQ( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL );
- pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL );
- }
- else if( alg == 'R' )
- {
- imp_match_init_strictR( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL );
+ fprintf( stderr, "Not supported\n" );
+ exit( 1 );
}
}
else if( force_fft || ( use_fft && ffttry ) )
if( alg == 'M' )
{
fprintf( stderr, "m" );
- pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
+ pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 );
}
else
- pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
+ pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
}
else
{
break;
case( 'M' ):
fprintf( stderr, "m" );
- pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+ pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
break;
case( 'A' ):
- pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
- break;
- case( 'Q' ):
- pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
+ pscore = A__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, -1, -1 );
break;
- case( 'R' ):
- pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
- break;
- case( 'H' ):
- pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
+ case( 'd' ):
+ pscore = D__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
break;
default:
ErrorExit( "ERROR IN SOURCE FILE" );
if( disp ) display( localcopy, njob );
+
+
+
pthread_mutex_lock( targ->mutex );
dep[l].done = 1;
(*nrunpt)--;
pthread_cond_broadcast( targ->treecond );
-// pthread_mutex_unlock( targ->mutex );
-// pthread_mutex_lock( targ->mutex );
-
for( i=0; (j=topol[l][0][i])!=-1; i++ )
strcpy( aseq[j], localcopy[j] );
for( i=0; (j=topol[l][1][i])!=-1; i++ )
strcpy( aseq[j], localcopy[j] );
+
pthread_mutex_unlock( targ->mutex );
for( i=0; (j=topol[l][0][i])!=-1; i++ )
}
#endif
-void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, double *effarr, int *alloclen, LocalHom **localhomtable, RNApair ***singlerna, double *effarr_kozo )
+void treebase( int *nlen, char **aseq, int nadd, char *mergeoralign, char **mseq1, char **mseq2, int ***topol, Treedep *dep, double *effarr, int *alloclen, LocalHom **localhomtable, RNApair ***singlerna, double *effarr_kozo, int *targetmap, int *targetmapr, int ntarget )
{
int i, l, m;
int len1nocommongap, len2nocommongap;
int len1, len2;
int clus1, clus2;
- float pscore, tscore;
- static char *indication1, *indication2;
- static double *effarr1 = NULL;
- static double *effarr2 = NULL;
- static double *effarr1_kozo = NULL;
- static double *effarr2_kozo = NULL;
- static LocalHom ***localhomshrink = NULL;
- static int *fftlog;
+ double pscore, tscore;
+ char *indication1, *indication2;
+ double *effarr1 = NULL;
+ double *effarr2 = NULL;
+ double *effarr1_kozo = NULL;
+ double *effarr2_kozo = NULL;
+ LocalHom ***localhomshrink = NULL;
+ char *swaplist = NULL;
+ int *fftlog;
int m1, m2;
- static int *gaplen;
- static int *gapmap;
- static int *alreadyaligned;
- float dumfl = 0.0;
+ int *gaplen;
+ int *gapmap;
+ int *alreadyaligned;
+// double dumfl = 0.0;
+ double dumdb = 0.0;
int ffttry;
- RNApair ***grouprna1, ***grouprna2;
+ RNApair ***grouprna1 = NULL, ***grouprna2 = NULL;
+ static double **dynamicmtx;
+ int gapmaplen;
if( rnakozo && rnaprediction == 'm' )
{
grouprna1 = grouprna2 = NULL;
}
- if( effarr1 == NULL )
- {
- fftlog = AllocateIntVec( njob );
- effarr1 = AllocateDoubleVec( njob );
- effarr2 = AllocateDoubleVec( njob );
- indication1 = AllocateCharVec( 150 );
- indication2 = AllocateCharVec( 150 );
- gaplen = AllocateIntVec( *alloclen+10 );
- gapmap = AllocateIntVec( *alloclen+10 );
- alreadyaligned = AllocateIntVec( njob );
+ fftlog = AllocateIntVec( njob );
+ effarr1 = AllocateDoubleVec( njob );
+ effarr2 = AllocateDoubleVec( njob );
+ indication1 = AllocateCharVec( 150 );
+ indication2 = AllocateCharVec( 150 );
+ gaplen = AllocateIntVec( *alloclen+10 );
+ gapmap = AllocateIntVec( *alloclen+10 );
+ alreadyaligned = AllocateIntVec( njob );
+ dynamicmtx = AllocateDoubleMtx( nalphabets, nalphabets );
#if 0
#else
- if( constraint )
- {
- localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom ** ) );
- for( i=0; i<njob; i++)
- localhomshrink[i] = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
- }
-#endif
- effarr1_kozo = AllocateDoubleVec( njob ); //tsuneni allocate sareru.
- effarr2_kozo = AllocateDoubleVec( njob ); //tsuneni allocate sareru.
- for( i=0; i<njob; i++ ) effarr1_kozo[i] = 0.0;
- for( i=0; i<njob; i++ ) effarr2_kozo[i] = 0.0;
+ swaplist = NULL;
+ if( constraint )
+ {
+ if( specifictarget ) swaplist = calloc( njob, sizeof( char ) );
+ localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom ** ) );
+ for( i=0; i<njob; i++ )
+ localhomshrink[i] = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
}
+#endif
+ effarr1_kozo = AllocateDoubleVec( njob ); //tsuneni allocate sareru.
+ effarr2_kozo = AllocateDoubleVec( njob ); //tsuneni allocate sareru.
+ for( i=0; i<njob; i++ ) effarr1_kozo[i] = 0.0;
+ for( i=0; i<njob; i++ ) effarr2_kozo[i] = 0.0;
for( i=0; i<njob-nadd; i++ ) alreadyaligned[i] = 1;
for( i=njob-nadd; i<njob; i++ ) alreadyaligned[i] = 0;
#endif
+
if( constraint )
- calcimportance( njob, effarr, aseq, localhomtable );
+ {
+// calcimportance( njob, effarr, aseq, localhomtable );
+// dontcalcimportance( njob, effarr, aseq, localhomtable ); // CHUUIII!!!!!
+ if( specifictarget )
+ calcimportance_target( njob, ntarget, effarr, aseq, localhomtable, targetmap, targetmapr );
+// dontcalcimportance_target( njob, effarr, aseq, localhomtable, ntarget ); // CHUUIII!!!!!
+ else
+// calcimportance( njob, effarr, aseq, localhomtable );
+ calcimportance_half( njob, effarr, aseq, localhomtable );
+ }
// writePre( njob, name, nlen, aseq, 0 );
tscore = 0.0;
for( l=0; l<njob-1; l++ )
{
+// fprintf( stderr, "\ndistfromtip = %f\n", dep[l].distfromtip );
+ makedynamicmtx( dynamicmtx, n_dis_consweight_multi, dep[l].distfromtip );
+// makedynamicmtx( dynamicmtx, n_dis_consweight_multi, ( dep[l].distfromtip - 0.2 ) * 3 );
if( mergeoralign[l] == 'n' )
{
// fprintf( stderr, "SKIP!\n" );
clus1 = fastconjuction_noname_kozo( topol[l][0], aseq, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
clus2 = fastconjuction_noname_kozo( topol[l][1], aseq, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
}
+#if 0
+ else if( specifictarget )
+ {
+ clus1 = fastconjuction_target( topol[l][0], aseq, mseq1, effarr1, effarr, indication1, minimumweight, targetmap );
+ clus2 = fastconjuction_target( topol[l][1], aseq, mseq2, effarr2, effarr, indication2, minimumweight, targetmap );
+ }
+#endif
else
{
- clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1 );
- clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2 );
+ clus1 = fastconjuction_noname( topol[l][0], aseq, mseq1, effarr1, effarr, indication1, minimumweight );
+ clus2 = fastconjuction_noname( topol[l][1], aseq, mseq2, effarr2, effarr, indication2, minimumweight );
}
- if( mergeoralign[l] == '1' || mergeoralign[l] == '2' )
+ if( mergeoralign[l] == '1' || mergeoralign[l] == '2' ) // only in serial version
{
newgapstr = "=";
}
if( constraint )
{
- fastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
+ if( specifictarget )
+ fastshrinklocalhom_target( topol[l][0], topol[l][1], localhomtable, localhomshrink, swaplist, targetmap );
+ else
+ fastshrinklocalhom_half( topol[l][0], topol[l][1], localhomtable, localhomshrink );
// msfastshrinklocalhom( topol[l][0], topol[l][1], localhomtable, localhomshrink );
// fprintf( stderr, "localhomshrink =\n" );
// outlocalhompt( localhomshrink, clus1, clus2 );
}
+
/*
fprintf( stderr, "before align all\n" );
display( aseq, njob );
fprintf( stderr, "\nlen1=%d, len2=%d, Switching to the memsave mode.\n", len1, len2 );
alg = 'M';
if( commonIP ) FreeIntMtx( commonIP );
+ commonIP = NULL;
commonAlloc1 = 0;
commonAlloc2 = 0;
}
if( fftlog[m1] && fftlog[m2] ) ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 1000 && clus2 < 1000 );
else ffttry = 0;
// ffttry = ( nlen[m1] > clus1 && nlen[m2] > clus2 && clus1 < 5000 && clus2 < 5000 ); // v6.708
-// fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (float)len1/fftlog[m1], clus1, (float)len2/fftlog[m2], clus2 );
+// fprintf( stderr, "f=%d, len1/fftlog[m1]=%f, clus1=%d, len2/fftlog[m2]=%f, clus2=%d\n", ffttry, (double)len1/fftlog[m1], clus1, (double)len2/fftlog[m2], clus2 );
// fprintf( stderr, "f=%d, clus1=%d, fftlog[m1]=%d, clus2=%d, fftlog[m2]=%d\n", ffttry, clus1, fftlog[m1], clus2, fftlog[m2] );
if( constraint == 2 )
{
fprintf( stderr, "c" );
if( alg == 'A' )
{
- imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
+ imp_match_init_strict( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, topol[l][0], topol[l][1] );
if( rnakozo ) imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL );
- pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+ pscore = A__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, topol[l][0][0], 1 ); // reuse profiles
}
- else if( alg == 'H' )
+ if( alg == 'd' )
{
- imp_match_init_strictH( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL );
+ imp_match_init_strictD( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, topol[l][0], topol[l][1] );
+ if( rnakozo ) imp_rnaD( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL );
+ pscore = D__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
}
else if( alg == 'Q' )
{
- imp_match_init_strictQ( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, NULL, NULL, NULL );
- pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL );
- }
- else if( alg == 'R' )
- {
- imp_match_init_strictR( NULL, clus1, clus2, strlen( mseq1[0] ), strlen( mseq2[0] ), mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, localhomshrink, &dumfl, NULL, NULL, NULL, NULL );
+ fprintf( stderr, "Not supported\n" );
+ exit( 1 );
}
}
else if( force_fft || ( use_fft && ffttry ) )
if( alg == 'M' )
{
fprintf( stderr, "m" );
- pscore = Falign_udpari_long( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1 );
+ pscore = Falign_udpari_long( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1 );
}
else
- pscore = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
+ pscore = Falign( NULL, NULL, dynamicmtx, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, *alloclen, fftlog+m1, NULL, 0, NULL );
}
else
{
break;
case( 'M' ):
fprintf( stderr, "m" );
- pscore = MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
+ pscore = MSalignmm( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
break;
case( 'A' ):
- pscore = A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
- break;
- case( 'Q' ):
- pscore = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
- break;
- case( 'R' ):
- pscore = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
+ pscore = A__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap, topol[l][0][0], 1 ); // reuse profiles
break;
- case( 'H' ):
- pscore = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumfl, NULL, NULL, NULL, NULL );
+ case( 'd' ):
+ pscore = D__align( dynamicmtx, mseq1, mseq2, effarr1, effarr2, clus1, clus2, *alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
break;
default:
ErrorExit( "ERROR IN SOURCE FILE" );
if( mergeoralign[l] == '1' ) // jissainiha nai. atarashii hairetsu ha saigo dakara.
{
- adjustgapmap( strlen( mseq2[0] )-len2nocommongap+len2, gapmap, mseq2[0] );
- restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen );
- findnewgaps( clus2, mseq2, gaplen );
- insertnewgaps( njob, alreadyaligned, aseq, topol[l][1], topol[l][0], gaplen, gapmap, *alloclen, alg );
- for( i=0; i<njob; i++ ) eq2dash( aseq[i] );
- for( i=0; (m=topol[l][0][i])>-1; i++ ) alreadyaligned[m] = 1;
+ reporterr( "Check source!!\n" );
+ exit( 1 );
}
if( mergeoralign[l] == '2' )
{
// fprintf( stderr, ">mseq1[0] = \n%s\n", mseq1[0] );
// fprintf( stderr, ">mseq2[0] = \n%s\n", mseq2[0] );
- adjustgapmap( strlen( mseq1[0] )-len1nocommongap+len1, gapmap, mseq1[0] );
- restorecommongaps( njob, aseq, topol[l][0], topol[l][1], gapmap, *alloclen );
- findnewgaps( clus1, mseq1, gaplen );
- insertnewgaps( njob, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg );
+// if( keeplength ) ndeleted += deletenewinsertions( clus1, clus2, mseq1, mseq2, NULL );
+ gapmaplen = strlen( mseq1[0] )-len1nocommongap+len1;
+ adjustgapmap( gapmaplen, gapmap, mseq1[0] );
+ if( smoothing )
+ {
+ restorecommongapssmoothly( njob, njob-(clus1+clus2), aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' );
+ findnewgaps( clus1, 0, mseq1, gaplen );
+ insertnewgaps_bothorders( njob, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, gapmaplen, *alloclen, alg, '-' );
+ }
+ else
+ {
+ restorecommongaps( njob, njob-(clus1+clus2), aseq, topol[l][0], topol[l][1], gapmap, *alloclen, '-' );
+ findnewgaps( clus1, 0, mseq1, gaplen );
+ insertnewgaps( njob, alreadyaligned, aseq, topol[l][0], topol[l][1], gaplen, gapmap, *alloclen, alg, '-' );
+ }
+#if 0
for( i=0; i<njob; i++ ) eq2dash( aseq[i] );
+ for( i=0; i<clus1; i++ ) eq2dash( mseq1[i] );
+ for( i=0; i<clus2; i++ ) eq2dash( mseq2[i] );
+#else
+ eq2dashmatometehayaku( mseq1, clus1 );
+ eq2dashmatometehayaku( mseq2, clus2 );
+#endif
for( i=0; (m=topol[l][1][i])>-1; i++ ) alreadyaligned[m] = 1;
}
free( topol[l][1] );
free( topol[l] );
}
+ free( topol[l] );
#if SCOREOUT
fprintf( stderr, "totalscore = %10.2f\n\n", tscore );
#endif
+ if( rnakozo && rnaprediction == 'm' )
+ {
+ if( grouprna1 ) free( grouprna1 ); // nakami ha?
+ if( grouprna2 ) free( grouprna2 ); // nakami ha?
+ grouprna1 = grouprna2 = NULL;
+ }
+ if( constraint )
+ {
+ if( localhomshrink ) // nen no tame
+ {
+ for( i=0; i<njob; i++ )
+ {
+ free( localhomshrink[i] );
+ localhomshrink[i] = NULL;
+ }
+ free( localhomshrink );
+ localhomshrink = NULL;
+ }
+ if( specifictarget ) free( swaplist );
+ }
+
+ free( topol );
+ free( fftlog );
+ free( effarr1 );
+ free( effarr2 );
+ free( indication1 );
+ free( indication2 );
+ free( gaplen );
+ free( gapmap );
+ free( alreadyaligned );
+ FreeDoubleMtx( dynamicmtx );
+ free( effarr1_kozo );
+ free( effarr2_kozo );
+ Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+ D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+ A__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0, -1, -1 );
+ imp_match_init_strictD( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL );
+ imp_match_init_strict( NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL );
+ FreeCommonIP();
}
static void WriteOptions( FILE *fp )
fflush( fp );
}
+static double **preparepartmtx( int nseq )
+{
+ int i;
+ double **val;
+ double size;
+
+ val = (double **)calloc( nseq, sizeof( double *) );;
+ size = 0;
+
+#if 0
+ if( compacttree == 1 )
+ {
+ for( i=0; i<nseq; i++ )
+ {
+ size += (double)sizeof( double ) * nseq;
+ if( size > maxdistmtxsize )
+ {
+ reporterr( "\n\nThe size of full distance matrix is estimated to exceed %.2fGB.\n", maxdistmtxsize / 1000 / 1000 /1000 );
+ reporterr( "Will try the calculation using a %d x %d matrix.\n", nseq, i );
+ reporterr( "This calculation will be slow due to the limited RAM space.\n", i, nseq );
+ reporterr( "To avoid the slowdown, please try '--initialramusage xGB' (x>>%.2f),\n", maxdistmtxsize / 1000 / 1000 /1000 );
+ reporterr( "if larger RAM space is available.\n" );
+ reporterr( "Note that xGB is NOT the upper limit of RAM usage.\n" );
+ reporterr( "Two to three times larger space may be used for building a guide tree.\n" );
+ reporterr( "Memory usage of the MSA stage depends on similarity of input sequences.\n\n" );
+// reporterr( "If the RAM is small, try '--initialramusage xGB' with a smaller x value.\n" );
+ reporterr( "The '--memsavetree' option uses smaller RAM space.\n" );
+ reporterr( "If tree-like relationship can be ignored, try '--pileup' or '--randomchain'.\n\n" );
+ reporterr( "The result of --initialramusage xGB is almost identical to the default, except for rounding differences.\n" );
+
+ reporterr( "In the cases of --memsavetree, --pileup and --randomchain, the result differs from the default.\n\n" );
+ break;
+ }
+ val[i] = (double *)calloc( nseq, sizeof( double ) );
+ }
+ if( i == nseq ) reporterr( "The full matrix will be used.\n" );
+
+ for( ;i<nseq; i++ ) val[i] = NULL; // nen no tame
+ }
+ else
+#endif
+ {
+ for( i=0; i<nseq; i++ ) val[i] = NULL; // nen no tame
+ }
+ return( val );
+}
+
int main( int argc, char *argv[] )
{
- static int *nlen;
- static float *selfscore;
+ static int *nlen = NULL;
+ static int *selfscore = NULL;
int nogaplen;
- static char **name, **seq;
- static char **mseq1, **mseq2;
- static char **bseq;
- static float **iscore, **iscore_kozo;
- static double *eff, *eff_kozo, *eff_kozo_mapped = NULL;
+ static char **name = NULL, **seq = NULL;
+ static char **mseq1 = NULL, **mseq2 = NULL;
+ static char **bseq = NULL;
+ static double **iscore = NULL, **iscore_kozo = NULL;
+ int **skiptable;
+ static double *eff = NULL, *eff_kozo = NULL, *eff_kozo_mapped = NULL;
int i, j, ien, ik, jk;
- static int ***topol, ***topol_kozo;
+ static int ***topol = NULL, ***topol_kozo = NULL;
static int *addmem;
- static Treedep *dep;
- static float **len, **len_kozo;
- FILE *prep;
- FILE *infp;
- FILE *orderfp;
- FILE *hat2p;
+ static Treedep *dep = NULL;
+ static double **len = NULL, **len_kozo = NULL;
+ FILE *prep = NULL;
+ FILE *infp = NULL;
+ FILE *orderfp = NULL;
+ FILE *hat2p = NULL;
double unweightedspscore;
int alignmentlength;
- char *mergeoralign;
+ char *mergeoralign = NULL;
int foundthebranch;
-
+ int nsubalignments, maxmem;
+ int **subtable;
+ int *insubtable;
+ int *preservegaps;
+ char ***subalnpt;
+ char *originalgaps = NULL;
+ char **addbk = NULL;
+ int **deletelist = NULL;
+ FILE *dlf = NULL;
+// for compacttree
+ int *mindistfrom = NULL;
+ double *mindist = NULL;
+ double **partmtx = NULL;
+// for compacttree
+
char c;
int alloclen;
LocalHom **localhomtable = NULL;
- RNApair ***singlerna;
- float ssi, ssj, bunbo;
- static char *kozoarivec;
+ LocalHom *tmpptr;
+ RNApair ***singlerna = NULL;
+ double ssi, ssj, bunbo;
+ static char *kozoarivec = NULL;
int nkozo;
+ int ntarget;
+ int *targetmap = NULL, *targetmapr = NULL;
+ int ilim, jst, jj;
+ int pac, tac;
+ char **pav, **tav;
- arguments( argc, argv );
-#ifndef enablemultithread
- nthread = 0;
-#endif
+ pav = calloc( argc, sizeof( char * ) );
+ tav = calloc( argc, sizeof( char * ) );
+
+ arguments( argc, argv, &pac, pav, &tac, tav );
+
+ if( fastathreshold < 0.0001 ) constraint = 0;
if( inputfile )
{
rewind( infp );
+
nkozo = 0;
if( njob < 2 )
exit( 1 );
}
+ if( subalignment )
+ {
+ readsubalignmentstable( njob, NULL, NULL, &nsubalignments, &maxmem );
+ fprintf( stderr, "nsubalignments = %d\n", nsubalignments );
+ fprintf( stderr, "maxmem = %d\n", maxmem );
+ subtable = AllocateIntMtx( nsubalignments, maxmem+1 );
+ insubtable = AllocateIntVec( njob );
+ for( i=0; i<njob; i++ ) insubtable[i] = 0;
+ preservegaps = AllocateIntVec( njob );
+ for( i=0; i<njob; i++ ) preservegaps[i] = 0;
+ subalnpt = AllocateCharCub( nsubalignments, maxmem, 0 );
+ readsubalignmentstable( njob, subtable, preservegaps, NULL, NULL );
+ }
+
seq = AllocateCharMtx( njob, nlenmax+1 );
mseq1 = AllocateCharMtx( njob, 0 );
mseq2 = AllocateCharMtx( njob, 0 );
name = AllocateCharMtx( njob, B+1 );
nlen = AllocateIntVec( njob );
- selfscore = AllocateFloatVec( njob );
+ selfscore = AllocateIntVec( njob );
topol = AllocateIntCub( njob, 2, 0 );
len = AllocateFloatMtx( njob, 2 );
- iscore = AllocateFloatHalfMtx( njob );
eff = AllocateDoubleVec( njob );
kozoarivec = AllocateCharVec( njob );
dep = (Treedep *)calloc( njob, sizeof( Treedep ) );
if( nadd ) addmem = AllocateIntVec( nadd+1 );
- if( constraint )
+
+ if( tbutree ) iscore = AllocateFloatHalfMtx( njob ); // tbutree=0 no toki aln kara mtx wo keisan, compacttree dehanaitoki nomi iscore shiyou.
+
+ ndeleted = 0;
+
+#if 0
+ readData( infp, name, nlen, seq );
+#else
+ readData_pointer( infp, name, nlen, seq );
+ fclose( infp );
+#endif
+
+ if( specifictarget )
+ {
+ targetmap = calloc( njob, sizeof( int ) );
+ ntarget = 0;
+ for( i=0; i<njob; i++ )
+ {
+ targetmap[i] = -1;
+ if( !strncmp( name[i]+1, "_focus_", 7 ) )
+ targetmap[i] = ntarget++;
+ }
+ targetmapr = calloc( ntarget, sizeof( int ) );
+ for( i=0; i<njob; i++ )
+ if( targetmap[i] != -1 ) targetmapr[targetmap[i]] = i;
+
+ }
+ else
+ {
+ ntarget = njob;
+ targetmap = calloc( njob, sizeof( int ) );
+ targetmapr = calloc( njob, sizeof( int ) );
+ for( i=0; i<njob; i++ )
+ targetmap[i] = targetmapr[i] = i;
+ }
+
+#if 0
+ for( i=0; i<njob; i++ )
+ reporterr( "targetmap[%d] = %d\n", i, targetmap[i] );
+ for( i=0; i<ntarget; i++ )
+ reporterr( "targetmapr[%d] = %d\n", i, targetmapr[i] );
+#endif
+
+// if( constraint && !noalign ) // 2016mar15 noalign tsuika
+ if( constraint ) // 2016Jul31 noalign no toki no shori (l=0.0) ha mafft.tmpl ni idou
{
- localhomtable = (LocalHom **)calloc( njob, sizeof( LocalHom *) );
- for( i=0; i<njob; i++)
+
+ ilim = njob;
+ localhomtable = (LocalHom **)calloc( ntarget, sizeof( LocalHom *) );
+ for( i=0; i<ntarget; i++ )
{
- localhomtable[i] = (LocalHom *)calloc( njob, sizeof( LocalHom ) );
- for( j=0; j<njob; j++)
+ localhomtable[i] = (LocalHom *)calloc( ilim, sizeof( LocalHom ) );
+ for( j=0; j<ilim; j++ )
{
localhomtable[i][j].start1 = -1;
localhomtable[i][j].end1 = -1;
localhomtable[i][j].opt = -1.0;
localhomtable[i][j].importance = -1.0;
localhomtable[i][j].next = NULL;
+ localhomtable[i][j].nokori = 0;
+ localhomtable[i][j].extended = -1;
+ localhomtable[i][j].last = localhomtable[i]+j;
localhomtable[i][j].korh = 'h';
}
+ if( !specifictarget ) ilim--;
+ }
+
+// reporterr( "pac=%d\n", pac );
+// reporterr( "pav[0]=%s\n", pav[0] );
+ if( callpairlocalalign )
+ {
+ pairlocalalign( njob, nlenmax, name, seq, iscore, localhomtable, pac, pav );
+ arguments( tac, tav, NULL, NULL, NULL, NULL ); // anzen no tame
+ callpairlocalalign = 1; // wakarinikui.
+ if( fastathreshold < 0.0001 ) constraint = 0;
+// fprintf( stderr, "blosum %d / kimura 200\n", nblosum );
+// fprintf( stderr, "scoremtx=%d\n", scoremtx );
+// fprintf( stderr, "fastathreshold=%f\n", fastathreshold );
+// fprintf( stderr, "constraing=%d\n", constraint );
+//exit( 1 );
+ for( ilim=njob, i=0; i<ntarget; i++ )
+ {
+ for( j=0; j<ilim; j++ )
+ {
+ for( tmpptr=localhomtable[i]+j; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1.0 ) continue;
+#if SHISHAGONYU // for debug
+ char buff[100];
+ sprintf( buff, "%10.5f", tmpptr->opt );
+ tmpptr->opt = 0.0;
+ sscanf( buff, "%lf", &(tmpptr->opt) );
+#endif
+ tmpptr->opt = ( tmpptr->opt ) / 5.8 * 600;
+ }
+ }
+ if( !specifictarget ) ilim--;
+ }
+
+ prep = fopen( "hat3.seed", "r" );
+ if( prep )
+ {
+ fprintf( stderr, "Loading 'hat3.seed' ... " );
+ if( specifictarget ) readlocalhomtable2_target( prep, njob, localhomtable, kozoarivec, targetmap ); // uwagakisarerukara koredehadame.
+ else readlocalhomtable2_half( prep, njob, localhomtable, kozoarivec ); // uwagakisarerukara koredehadame.
+ fclose( prep );
+ fprintf( stderr, "\ndone.\n" );
+ }
+ else
+ fprintf( stderr, "No hat3.seed.\n" );
+
+ if( outputhat23 )
+ {
+ prep = fopen( "hat3", "w" );
+ if( !prep ) ErrorExit( "Cannot open hat3 to write." );
+
+ fprintf( stderr, "Writing hat3 for iterative refinement\n" );
+ if( specifictarget )
+ ilim = ntarget;
+ else
+ ilim = njob-1;
+ for( i=0; i<ilim; i++ )
+ {
+ if( specifictarget )
+ {
+ jst = 0;
+ jj = 0;
+ }
+ else
+ {
+ jst = i;
+ jj = 0;
+ }
+ for( j=jst; j<njob; j++, jj++ )
+ {
+ for( tmpptr=localhomtable[i]+jj; tmpptr; tmpptr=tmpptr->next )
+ {
+ if( tmpptr->opt == -1.0 ) continue;
+ if( targetmap[j] == -1 || targetmap[i] < targetmap[j] )
+ fprintf( prep, "%d %d %d %7.5f %d %d %d %d %c\n", targetmapr[i], j, tmpptr->overlapaa, tmpptr->opt/600*5.8, tmpptr->start1, tmpptr->end1, tmpptr->start2, tmpptr->end2, tmpptr->korh );
+ }
+ }
+ }
+ fclose( prep );
+
+ prep = fopen( "hat2", "w" );
+ WriteFloatHat2_pointer_halfmtx( prep, njob, name, iscore );
+ fclose( prep );
+ }
+ else if( distout ) // choufuku shiterukedo, muda deha nai.
+ {
+ prep = fopen( "hat2", "w" );
+ WriteFloatHat2_pointer_halfmtx( prep, njob, name, iscore );
+ fclose( prep );
+ }
+ }
+ else
+ {
+ fprintf( stderr, "Loading 'hat3' ... " );
+ prep = fopen( "hat3", "r" );
+ if( prep == NULL ) ErrorExit( "Make hat3." );
+ if( specifictarget ) readlocalhomtable2_target( prep, njob, localhomtable, kozoarivec, targetmap );
+ else readlocalhomtable2_half( prep, njob, localhomtable, kozoarivec );
+ fclose( prep );
+ fprintf( stderr, "\ndone.\n" );
}
- fprintf( stderr, "Loading 'hat3' ... " );
- prep = fopen( "hat3", "r" );
- if( prep == NULL ) ErrorExit( "Make hat3." );
- readlocalhomtable( prep, njob, localhomtable, kozoarivec );
- fclose( prep );
- fprintf( stderr, "\ndone.\n" );
nkozo = 0;
}
-// outlocalhom( localhomtable, njob );
+#if 0
+ if( specifictarget )
+ outlocalhom_target( localhomtable, ntarget, njob );
+ else
+ outlocalhom_half( localhomtable, njob );
+ exit( 1 );
+#endif
#if 0
fprintf( stderr, "Extending localhom ... " );
fprintf( stderr, "done.\n" );
#endif
}
+ else
+ {
+
+ if( callpairlocalalign )
+ {
+ pairlocalalign( njob, nlenmax, name, seq, iscore, NULL, pac, pav );
+ arguments( tac, tav, NULL, NULL, NULL, NULL ); // anzen no tame
+ callpairlocalalign = 1; // wakarinikui.
+ if( fastathreshold < 0.0001 ) constraint = 0;
+ fprintf( stderr, "blosum %d / kimura 200\n", nblosum );
+ fprintf( stderr, "scoremtx=%d\n", scoremtx );
+ fprintf( stderr, "fastathreshold=%f\n", fastathreshold );
+ }
+ if( distout || outputhat23 )
+ {
+ reporterr( "\nwriting hat2 (1)\n" );
+ prep = fopen( "hat2", "w" );
+ WriteFloatHat2_pointer_halfmtx( prep, njob, name, iscore );
+ fclose( prep );
+ }
+ }
-#if 0
- readData( infp, name, nlen, seq );
-#else
- readData_pointer( infp, name, nlen, seq );
- fclose( infp );
-#endif
+ free( tav );
+ free( pav );
constants( njob, seq );
+
#if 0
fprintf( stderr, "params = %d, %d, %d\n", penalty, penalty_ex, offset );
#endif
WriteOptions( trap_g );
+ if( distout && !treeout && noalign ) // 2016Jul31. Free ha mada fukanzen.
+ {
+ writeData_pointer( prep_g, njob, name, nlen, seq );
+ fprintf( stderr, "\n" );
+ SHOWVERSION;
+ goto chudan;
+ }
+
+
+
+
c = seqcheck( seq );
if( c )
{
if( treein )
{
+ int dumx, dumy;
+ double dumz;
+ treein = check_guidetreefile( &dumx, &dumy, &dumz );
+ if( treein == 'C' )
+ {
+ compacttree = 2;
+ treein = 0;
+ use_fft = 0; // kankeinai?
+ }
+
+ // else treein = 1 no mama
+ }
+
+ reporterr( "treein = %d\n", treein );
+ reporterr( "compacttree = %d\n", compacttree );
+
+ if( nadd && keeplength )
+ {
+ originalgaps = (char *)calloc( nlenmax+1, sizeof( char) );
+ recordoriginalgaps( originalgaps, njob-nadd, seq );
+
+ if( mapout )
+ {
+ addbk = (char **)calloc( nadd+1, sizeof( char * ) );
+ for( i=0; i<nadd; i++ )
+ {
+ ien = strlen( seq[njob-nadd+i] );
+ addbk[i] = (char *)calloc( ien + 1, sizeof( char ) );
+ gappick0( addbk[i], seq[njob-nadd+i] );
+ }
+ addbk[nadd] = NULL;
+ }
+ else
+ addbk = NULL;
+ }
+ else
+ {
+ originalgaps = NULL;
+ addbk = NULL;
+ }
+
+ if( treein )
+ {
#if 0
if( nkozo )
{
exit( 1 );
}
#endif
- fprintf( stderr, "Loading a tree ... " );
- loadtree( njob, topol, len, name, nlen, dep );
+ loadtree( njob, topol, len, name, nlen, dep, treeout );
+// loadtop( njob, topol, len, name, NULL, dep ); // 2015/Jan/13, not yet checked
fprintf( stderr, "\ndone.\n\n" );
}
else
{
- if( tbutree == 0 )
+ if( tbutree == 0 && compacttree ) // compacttree no toki ha treein ha 0 de uwagaki sarete iru.
+ {
+ iscore = NULL;// tsukawanai
+ reporterr( "Making a compact tree from msa, step 1.. \n" );
+ skiptable = AllocateIntMtx( njob, 0 );
+ makeskiptable( njob, skiptable, seq ); // allocate suru.
+ mindistfrom = (int *)calloc( njob, sizeof( int ) );
+ mindist = (double *)calloc( njob, sizeof( double ) );
+ partmtx = preparepartmtx( njob );
+
+ for( i=0; i<njob; i++ ) // disttbfast deha kokoniha nakatta.
+ {
+// selfscore[i] = naivepairscore11( seq[i], seq[i], penalty_dist );
+ selfscore[i] = (int)naivepairscorefast( seq[i], seq[i], skiptable[i], skiptable[i], penalty_dist );
+// fprintf( stderr, "penalty = %d\n", penalty );
+// fprintf( stderr, "penalty_dist = %d\n", penalty_dist );
+ }
+#ifdef enablemultithread
+ if( nthread > 0 )
+ {
+ msacompactdistmtxthread_arg_t *targ;
+ int jobpos;
+ pthread_t *handle;
+ pthread_mutex_t mutex;
+ double **mindistthread;
+ int **mindistfromthread;
+
+ mindistthread = AllocateDoubleMtx( nthread, njob );
+ mindistfromthread = AllocateIntMtx( nthread, njob );
+ targ = calloc( nthread, sizeof( msacompactdistmtxthread_arg_t ) );
+ handle = calloc( nthread, sizeof( pthread_t ) );
+ pthread_mutex_init( &mutex, NULL );
+ jobpos = 0;
+
+ for( i=0; i<nthread; i++ )
+ {
+ for( j=0; j<njob; j++ )
+ {
+ mindistthread[i][j] = 999.9;
+ mindistfromthread[i][j] = -1;
+ }
+ targ[i].thread_no = i;
+ targ[i].njob = njob;
+ targ[i].selfscore = selfscore;
+ targ[i].partmtx = partmtx;
+ targ[i].seq = seq;
+ targ[i].skiptable = skiptable;
+ targ[i].jobpospt = &jobpos;
+ targ[i].mindistfrom = mindistfromthread[i];
+ targ[i].mindist = mindistthread[i];
+ targ[i].mutex = &mutex;
+
+ pthread_create( handle+i, NULL, msacompactdisthalfmtxthread, (void *)(targ+i) );
+ }
+
+ for( i=0; i<nthread; i++ ) pthread_join( handle[i], NULL );
+ pthread_mutex_destroy( &mutex );
+
+ for( i=0; i<njob; i++ )
+ {
+ mindist[i] = 999.9;
+ mindistfrom[i] = -1;
+ for( j=0; j<nthread; j++ )
+ {
+ if( mindistthread[j][i] < mindist[i] )
+ {
+ mindist[i] = mindistthread[j][i];
+ mindistfrom[i] = mindistfromthread[j][i];
+ }
+ }
+ }
+ for( i=0; i<njob; i++ ) mindist[i] -= preferenceval( i, mindistfrom[i], njob ); // for debug
+
+ free( handle );
+ free( targ );
+ FreeDoubleMtx( mindistthread );
+ FreeIntMtx( mindistfromthread );
+ }
+ else
+#endif
+ {
+ msacompactdistmtxthread_arg_t *targ;
+ int jobpos;
+ jobpos = 0;
+ targ = calloc( 1, sizeof( msacompactdistmtxthread_arg_t ) );
+
+ {
+ for( j=0; j<njob; j++ )
+ {
+ mindist[j] = 999.9;
+ mindistfrom[j] = -1;
+ }
+ targ[0].thread_no = 0;
+ targ[0].njob = njob;
+ targ[0].selfscore = selfscore;
+ targ[0].partmtx = partmtx;
+ targ[0].seq = seq;
+ targ[0].skiptable = skiptable;
+ targ[0].jobpospt = &jobpos;
+ targ[0].mindistfrom = mindistfrom;
+ targ[0].mindist = mindist;
+
+ msacompactdisthalfmtxthread( targ );
+// msacompactdistmtxthread( targ );
+ }
+ free( targ );
+ for( i=0; i<njob; i++ ) mindist[i] -= preferenceval( i, mindistfrom[i], njob ); // for debug
+ }
+// free( selfscore ); selfscore = NULL; // mada tsukau
+// FreeCharMtx( bseq ); bseq = NULL; // mada tsukau
+// if( skiptable) FreeIntMtx( skiptable ); skiptable = NULL;
+
+// for( i=0; i<njob; i++ ) printf( "mindist[%d] = %f\n", i, mindist[i] );
+// exit( 1 );
+ reporterr( "\rdone. \n" );
+ }
+ else if( tbutree == 0 && compacttree == 0 )
{
+ reporterr( "Making a distance matrix from msa .. \n" );
+// reporterr( "Bug. This function should not be used in versions >=7.2. Please email kazutaka.katoh@aist.go.jp\n" );
+// fflush( stderr );
+// exit( 1 );
+ iscore = AllocateFloatHalfMtx( njob ); // tbutree == 0 no baai ha allocate sareteinainode
+
for( i=1; i<njob; i++ )
{
if( nlen[i] != nlen[0] )
}
}
- fprintf( stderr, "Making a distance matrix .. \n" );
- fflush( stderr );
+ skiptable = AllocateIntMtx( njob, 0 );
+ makeskiptable( njob, skiptable, seq ); // allocate suru.
ien = njob-1;
for( i=0; i<njob; i++ )
{
- selfscore[i] = naivepairscore11( seq[i], seq[i], penalty );
+// selfscore[i] = naivepairscore11( seq[i], seq[i], penalty_dist );
+ selfscore[i] = (int)naivepairscorefast( seq[i], seq[i], skiptable[i], skiptable[i], penalty_dist );
+// fprintf( stderr, "penalty = %d\n", penalty );
+// fprintf( stderr, "penalty_dist = %d\n", penalty_dist );
}
#ifdef enablemultithread
if( nthread > 0 )
targ[i].selfscore = selfscore;
targ[i].iscore = iscore;
targ[i].seq = seq;
+ targ[i].skiptable = skiptable;
targ[i].jobpospt = &jobpos;
targ[i].mutex = &mutex;
ssj = selfscore[j];
bunbo = MIN( ssi, ssj );
if( bunbo == 0.0 )
- iscore[i][j-i] = 1.0;
+ iscore[i][j-i] = 2.0; // 2013/Oct/17 2bai
else
-// iscore[i][j-i] = 1.0 - naivepairscore11( seq[i], seq[j], penalty ) / MIN( selfscore[i], selfscore[j] );
- iscore[i][j-i] = 1.0 - naivepairscore11( seq[i], seq[j], penalty ) / bunbo;
+// iscore[i][j-i] = 1.0 - naivepairscore11( seq[i], seq[j], penalty_dist ) / MIN( selfscore[i], selfscore[j] );
+// iscore[i][j-i] = ( 1.0 - naivepairscore11( seq[i], seq[j], penalty_dist ) / bunbo ) * 2.0; // 2013/Oct/17 2bai
+ iscore[i][j-i] = ( 1.0 - naivepairscorefast( seq[i], seq[j], skiptable[i], skiptable[j], penalty_dist ) / bunbo ) * 2.0; // 2014/Aug/15 fast
+ if( iscore[i][j-i] > 10 ) iscore[i][j-i] = 10.0; // 2015/Mar/17
+//exit( 1 );
#if 0
fprintf( stderr, "### ssj = %f\n", ssj );
fprintf( stderr, "### selfscore[i] = %f\n", selfscore[i] );
fprintf( stderr, "### selfscore[j] = %f\n", selfscore[j] );
- fprintf( stderr, "### rawscore = %f\n", naivepairscore11( seq[i], seq[j], penalty ) );
+ fprintf( stderr, "### rawscore = %f\n", naivepairscore11( seq[i], seq[j], penalty_dist ) );
#endif
}
}
}
- fprintf( stderr, "\ndone.\n\n" );
- fflush( stderr );
+// fprintf( stderr, "\ndone.\n\n" );
+ FreeIntMtx( skiptable );
+// fflush( stderr );
+ reporterr( "\rdone. \n" );
+
}
else
{
- fprintf( stderr, "Loading 'hat2' ... " );
- prep = fopen( "hat2", "r" );
- if( prep == NULL ) ErrorExit( "Make hat2." );
- readhat2_floathalf_pointer( prep, njob, name, iscore );
- fclose( prep );
- fprintf( stderr, "done.\n" );
- }
-#if 1
- if( distout )
- {
- hat2p = fopen( "hat2", "w" );
- WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, iscore );
- fclose( hat2p );
- }
+ if( callpairlocalalign )
+ {
+ if( multidist )
+ {
+ reporterr( "Bug in v7.290. Please email kazutaka.katoh@aist.go.jp\n" );
+ exit( 1 );
+ }
+#if 0
+ prep = fopen( "hat2", "w" );
+ if( !prep ) ErrorExit( "Cannot open hat2." );
+ WriteFloatHat2_pointer_halfmtx( prep, njob, name, iscore ); // jissiha double
+ fclose( prep );
#endif
+ }
+ else
+ {
+ if( multidist )
+ {
+ fprintf( stderr, "Loading 'hat2n' (aligned sequences - new sequences) ... " );
+ prep = fopen( "hat2n", "r" );
+ if( prep == NULL ) ErrorExit( "Make hat2." );
+ readhat2_doublehalf_pointer( prep, njob, name, iscore );
+ fclose( prep );
+ fprintf( stderr, "done.\n" );
+
+ fprintf( stderr, "Loading 'hat2i' (aligned sequences) ... " );
+ prep = fopen( "hat2i", "r" );
+ if( prep == NULL ) ErrorExit( "Make hat2i." );
+ readhat2_doublehalf_pointer( prep, njob-nadd, name, iscore );
+ fclose( prep );
+ fprintf( stderr, "done.\n" );
+ }
+ else
+ {
+ fprintf( stderr, "Loading 'hat2' ... " );
+ prep = fopen( "hat2", "r" );
+ if( prep == NULL ) ErrorExit( "Make hat2." );
+ readhat2_doublehalf_pointer( prep, njob, name, iscore );
+ fclose( prep );
+ fprintf( stderr, "done.\n" );
+ }
+
+ if( distout ) // callpairlocalalign == 1 no toki ha ue de shorizumi.
+ {
+ reporterr( "\nwriting hat2 (2)\n" );
+ hat2p = fopen( "hat2", "w" );
+ WriteFloatHat2_pointer_halfmtx( hat2p, njob, name, iscore );
+ fclose( hat2p );
+ }
+ }
+// for( i=0; i<njob-1; i++ ) for( j=i+1; j<njob; j++ ) printf( "dist %d-%d = %f\n", i, j, iscore[i][j-i] );
+ }
+
if( nkozo )
{
ien = njob-1;
}
}
- fprintf( stderr, "Constructing a UPGMA tree ... " );
+// fprintf( stderr, "Constructing a UPGMA tree ... " );
fflush( stderr );
if( topin )
{
- fprintf( stderr, "Loading a topology ... " );
- loadtop( njob, iscore, topol, len );
- fprintf( stderr, "\ndone.\n\n" );
+ fprintf( stderr, "--topin has been disabled\n" );
+ exit( 1 );
+// fprintf( stderr, "Loading a topology ... " );
+// loadtop( njob, iscore, topol, len );
+// fprintf( stderr, "\ndone.\n\n" );
+ }
+ else if( subalignment ) // merge error no tame
+ {
+ fprintf( stderr, "Constructing a UPGMA tree ... " );
+ fixed_supg_double_realloc_nobk_halfmtx_treeout_constrained( njob, iscore, topol, len, name, nlen, dep, nsubalignments, subtable, 1 );
}
- else if( treeout )
+ else if( tbutree == 0 && compacttree ) // tbutree != 0 no toki (aln->mtx) ha, 6merdistance -> disttbfast.c; dp distance -> muzukashii
{
- fixed_musclesupg_float_realloc_nobk_halfmtx_treeout( njob, iscore, topol, len, name, nlen, dep );
+ reporterr( "Constructing a tree ... " );
+ compacttree_memsaveselectable( njob, partmtx, mindistfrom, mindist, NULL, selfscore, seq, skiptable, topol, len, name, NULL, dep, treeout, compacttree, 1 );
+ if( mindistfrom ) free( mindistfrom ); mindistfrom = NULL;
+ if( mindist ) free( mindist );; mindist = NULL;
+// if( selfscore ) free( selfscore ); selfscore = NULL; // matomete free
+ if( skiptable) FreeIntMtx( skiptable ); skiptable = NULL; // nikaime dake
+ free( partmtx );
+ }
+ else if( treeout ) // merge error no tame
+ {
+ fprintf( stderr, "Constructing a UPGMA tree ... " );
+ fixed_musclesupg_double_realloc_nobk_halfmtx_treeout( njob, iscore, topol, len, name, nlen, dep, 1 );
}
else
{
- fixed_musclesupg_float_realloc_nobk_halfmtx( njob, iscore, topol, len, dep );
+ fprintf( stderr, "Constructing a UPGMA tree ... " );
+ fixed_musclesupg_double_realloc_nobk_halfmtx( njob, iscore, topol, len, dep, 1, 1 );
}
// else
// ErrorExit( "Incorrect tree\n" );
// for( i=0; i<nkozo-1; i++ )
// for( j=i+1; j<nkozo; j++ )
// fprintf( stderr, "iscore_kozo[%d][%d] =~ %f\n", i, j, iscore_kozo[i][j-i] );
- fixed_musclesupg_float_realloc_nobk_halfmtx( nkozo, iscore_kozo, topol_kozo, len_kozo, NULL );
+ fixed_musclesupg_double_realloc_nobk_halfmtx( nkozo, iscore_kozo, topol_kozo, len_kozo, NULL, 1, 1 );
}
fprintf( stderr, "\ndone.\n\n" );
fflush( stderr );
}
+
orderfp = fopen( "order", "w" );
if( !orderfp )
{
writeData_pointer( prep_g, njob, name, nlen, seq );
fprintf( stderr, "\n" );
SHOWVERSION;
- return( 0 );
+ goto chudan; // 2016Jul31
}
// countnode( njob, topol, node0 );
#if 0
utree = 0; counteff( njob, topol, len, eff ); utree = 1;
#else
- counteff_simple_float( njob, topol, len, eff );
-
+ counteff_simple_double_nostatic( njob, topol, len, eff );
+ for( i=njob-nadd; i<njob; i++ ) eff[i] /= (double)100;
+#if 0
+ fprintf( stderr, "###### WEIGHT = \n" );
+ for( i=0; i<njob; i++ )
+ {
+ fprintf( stderr, "w[%d] = %f\n", i, eff[i] );
+ }
+ exit( 1 );
+#endif
if( nkozo )
{
-// counteff_simple_float( nkozo, topol_kozo, len_kozo, eff_kozo ); // single weight nanode iranai
+// counteff_simple_double( nkozo, topol_kozo, len_kozo, eff_kozo ); // single weight nanode iranai
for( i=0,j=0; i<njob; i++ )
{
if( kozoarivec[i] )
}
}
- FreeFloatHalfMtx( iscore, njob );
+ if( iscore ) FreeFloatHalfMtx( iscore, njob ); iscore = NULL;
FreeFloatMtx( len );
alloclen = nlenmax*2+1; //chuui!
bseq = AllocateCharMtx( njob, alloclen );
+
if( nadd )
{
alignmentlength = strlen( seq[0] );
mergeoralign[i] = '1';
foundthebranch = 1;
}
- else if( samemember( topol[i][1], addmem ) )
+ else if( samemember( topol[i][1], addmem ) ) // samemembern ni henkou kanou
{
mergeoralign[i] = '2';
foundthebranch = 1;
addmem[1] = -1;
for( i=0; i<njob-1; i++ )
{
- if( samemember( topol[i][0], addmem ) ) // arieru
+ if( samemembern( topol[i][0], addmem, 1 ) ) // arieru
{
// fprintf( stderr, "HIT!\n" );
if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
else mergeoralign[i] = '1';
}
- else if( samemember( topol[i][1], addmem ) )
+ else if( samemembern( topol[i][1], addmem, 1 ) )
{
// fprintf( stderr, "HIT!\n" );
if( mergeoralign[i] != 'n' ) mergeoralign[i] = 'w';
commongappick( njob-nadd, seq );
for( i=0; i<njob-nadd; i++ ) strcpy( bseq[i], seq[i] );
}
+//--------------- kokokara ----
+ else if( subalignment )
+ {
+ for( i=0; i<njob-1; i++ ) mergeoralign[i] = 'a';
+ for( i=0; i<nsubalignments; i++ )
+ {
+ fprintf( stderr, "Checking subalignment %d:\n", i+1 );
+ alignmentlength = strlen( seq[subtable[i][0]] );
+// for( j=0; subtable[i][j]!=-1; j++ )
+// fprintf( stderr, " %d. %-30.30s\n", subtable[i][j]+1, name[subtable[i][j]]+1 );
+ for( j=0; subtable[i][j]!=-1; j++ )
+ {
+ if( subtable[i][j] >= njob )
+ {
+ fprintf( stderr, "No such sequence, %d.\n", subtable[i][j]+1 );
+ exit( 1 );
+ }
+ if( alignmentlength != strlen( seq[subtable[i][j]] ) )
+ {
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "###############################################################################\n" );
+ fprintf( stderr, "# ERROR!\n" );
+ fprintf( stderr, "# Subalignment %d must be aligned.\n", i+1 );
+ fprintf( stderr, "# Please check the alignment lengths of following sequences.\n" );
+ fprintf( stderr, "#\n" );
+ fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][0]+1, name[subtable[i][0]]+1, alignmentlength );
+ fprintf( stderr, "# %d. %-10.10s -> %d letters (including gaps)\n", subtable[i][j]+1, name[subtable[i][j]]+1, (int)strlen( seq[subtable[i][j]] ) );
+ fprintf( stderr, "#\n" );
+ fprintf( stderr, "# See http://mafft.cbrc.jp/alignment/software/merge.html for details.\n" );
+ if( subalignmentoffset )
+ {
+ fprintf( stderr, "#\n" );
+ fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset );
+ fprintf( stderr, "# In this case, the rule of numbering is:\n" );
+ fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset );
+ fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob );
+ }
+ fprintf( stderr, "###############################################################################\n" );
+ fprintf( stderr, "\n" );
+ exit( 1 );
+ }
+ insubtable[subtable[i][j]] = 1;
+ }
+ for( j=0; j<njob-1; j++ )
+ {
+ if( includemember( topol[j][0], subtable[i] ) && includemember( topol[j][1], subtable[i] ) )
+ {
+ mergeoralign[j] = 'n';
+ }
+ }
+ foundthebranch = 0;
+ for( j=0; j<njob-1; j++ )
+ {
+ if( samemember( topol[j][0], subtable[i] ) || samemember( topol[j][1], subtable[i] ) )
+ {
+ foundthebranch = 1;
+ fprintf( stderr, " -> OK\n" );
+ break;
+ }
+ }
+ if( !foundthebranch )
+ {
+ system( "cp infile.tree GuideTree" ); // tekitou
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "###############################################################################\n" );
+ fprintf( stderr, "# ERROR!\n" );
+ fprintf( stderr, "# Subalignment %d does not form a monophyletic cluster\n", i+1 );
+ fprintf( stderr, "# in the guide tree ('GuideTree' in this directory) internally computed.\n" );
+ fprintf( stderr, "# If you really want to use this subalignment, pelase give a tree with --treein \n" );
+ fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/treein.html\n" );
+ fprintf( stderr, "# http://mafft.cbrc.jp/alignment/software/merge.html\n" );
+ if( subalignmentoffset )
+ {
+ fprintf( stderr, "#\n" );
+ fprintf( stderr, "# You specified seed alignment(s) consisting of %d sequences.\n", subalignmentoffset );
+ fprintf( stderr, "# In this case, the rule of numbering is:\n" );
+ fprintf( stderr, "# The aligned seed sequences are numbered as 1 .. %d\n", subalignmentoffset );
+ fprintf( stderr, "# The input sequences to be aligned are numbered as %d .. %d\n", subalignmentoffset+1, subalignmentoffset+njob );
+ }
+ fprintf( stderr, "############################################################################### \n" );
+ fprintf( stderr, "\n" );
+ exit( 1 );
+ }
+// commongappick( seq[subtable[i]], subalignment[i] ); // irukamo
+ }
+#if 0
+ for( i=0; i<njob-1; i++ )
+ {
+ fprintf( stderr, "STEP %d\n", i+1 );
+ fprintf( stderr, "group1 = " );
+ for( j=0; topol[i][0][j] != -1; j++ )
+ fprintf( stderr, "%d ", topol[i][0][j]+1 );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "group2 = " );
+ for( j=0; topol[i][1][j] != -1; j++ )
+ fprintf( stderr, "%d ", topol[i][1][j]+1 );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "%d -> %c\n\n", i, mergeoralign[i] );
+ }
+#endif
+
+ for( i=0; i<njob; i++ )
+ {
+ if( insubtable[i] ) strcpy( bseq[i], seq[i] );
+ else gappick0( bseq[i], seq[i] );
+ }
+
+ for( i=0; i<nsubalignments; i++ )
+ {
+ for( j=0; subtable[i][j]!=-1; j++ ) subalnpt[i][j] = bseq[subtable[i][j]];
+ if( !preservegaps[i] ) commongappick( j, subalnpt[i] );
+ }
+
+ FreeIntMtx( subtable );
+ free( insubtable );
+ for( i=0; i<nsubalignments; i++ ) free( subalnpt[i] );
+ free( subalnpt );
+ free( preservegaps );
+ }
+//--------------- kokomade ----
else
{
for( i=0; i<njob; i++ ) gappick0( bseq[i], seq[i] );
for( i=0; i<njob; i++ )
{
nogaplen = strlen( bseq[i] );
- singlerna[i] = (RNApair **)calloc( nogaplen, sizeof( RNApair * ) );
+ singlerna[i] = (RNApair **)calloc( nogaplen+1, sizeof( RNApair * ) );
for( j=0; j<nogaplen; j++ )
{
singlerna[i][j] = (RNApair *)calloc( 1, sizeof( RNApair ) );
singlerna[i][j][0].bestpos = -1;
singlerna[i][j][0].bestscore = -1.0;
}
+ singlerna[i][nogaplen] = NULL;
+// fprintf( stderr, "### reading bpp %d ...\n", i );
readmccaskill( prep, singlerna[i], nogaplen );
}
fclose( prep );
for( i=0; i<njob; i++ ) fftlog[i] = 1;
if( constraint )
- calcimportance( njob, eff, bseq, localhomtable );
+ {
+ if( specifictarget )
+ calcimportance_target( njob, ntarget, eff, bseq, localhomtable, targetmap, targetmapr );
+// dontcalcimportance_target( njob, eff, bseq, localhomtable, ntarget ); // CHUUIII!!!!!
+ else
+ calcimportance_half( njob, eff, bseq, localhomtable );
+ }
+// dontcalcimportance( njob, eff, bseq, localhomtable ); // CHUUUUIIII!!!
for( i=0; i<nthread_yoyu; i++ )
{
targ[i].singlerna = singlerna;
targ[i].effarr_kozo = eff_kozo_mapped;
targ[i].fftlog = fftlog;
+ targ[i].mergeoralign = mergeoralign;
+ targ[i].targetmap = targetmap;
targ[i].mutex = &mutex;
targ[i].treecond = &treecond;
free( handle );
free( targ );
free( fftlog );
+// free( topol[njob-1][0] );
+// free( topol[njob-1][1] );
+ free( topol[njob-1] );
+ free( topol );
}
else
#endif
- treebase( nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, eff, &alloclen, localhomtable, singlerna, eff_kozo_mapped );
+
+ treebase( nlen, bseq, nadd, mergeoralign, mseq1, mseq2, topol, dep, eff, &alloclen, localhomtable, singlerna, eff_kozo_mapped, targetmap, targetmapr, ntarget );
fprintf( stderr, "\ndone.\n" );
+
+
+ if( keeplength )
+ {
+
+ dlf = fopen( "_deletelist", "w" );
+ deletelist = (int **)calloc( nadd+1, sizeof( int * ) );
+ for( i=0; i<nadd; i++ )
+ {
+ deletelist[i] = calloc( 1, sizeof( int ) );
+ deletelist[i][0] = -1;
+ }
+ deletelist[nadd] = NULL;
+ ndeleted = deletenewinsertions_whole( njob-nadd, nadd, bseq, bseq+njob-nadd, deletelist );
+
+ for( i=0; i<nadd; i++ )
+ {
+ if( deletelist[i] )
+ for( j=0; deletelist[i][j]!=-1; j++ )
+ fprintf( dlf, "%d %d\n", njob-nadd+i, deletelist[i][j] ); // 0origin
+ }
+ fclose( dlf );
+
+ restoreoriginalgaps( njob, bseq, originalgaps );
+
+ if( mapout )
+ {
+ dlf = fopen( "_deletemap", "w" );
+ reconstructdeletemap( nadd, addbk, deletelist, bseq+njob-nadd, dlf, name+njob-nadd );
+ FreeCharMtx( addbk );
+ addbk = NULL;
+ fclose( dlf );
+ }
+
+ FreeIntMtx( deletelist );
+ deletelist = NULL;
+ }
+
+
+
if( scoreout )
{
unweightedspscore = plainscore( njob, bseq );
if( constraint )
{
LocalHom *tmppt1, *tmppt2;
- for( i=0; i<njob; i++)
+ for( i=0; i<njob; i++ )
{
- for( j=0; j<njob; j++)
+ for( j=0; j<njob; j++ )
{
tmppt1 = localhomtable[i]+j;
while( tmppt2 = tmppt1->next )
#endif
fprintf( trap_g, "done.\n" );
- fclose( trap_g );
+// fclose( trap_g );
free( mergeoralign );
+ freeconstants();
+
+
+
+ if( rnakozo && rnaprediction == 'm' )
+ {
+ if( singlerna ) // nen no tame
+ {
+ for( i=0; i<njob; i++ )
+ {
+ for( j=0; singlerna[i][j]!=NULL; j++ )
+ {
+ if( singlerna[i][j] ) free( singlerna[i][j] );
+ }
+ if( singlerna[i] ) free( singlerna[i] );
+ }
+ free( singlerna );
+ singlerna = NULL;
+ }
+ }
writeData_pointer( prep_g, njob, name, nlen, bseq );
#if 0
fprintf( stderr, "OSHIMAI\n" );
#endif
- if( constraint ) FreeLocalHomTable( localhomtable, njob );
+ if( constraint )
+ {
+ if( specifictarget )
+ FreeLocalHomTable_part( localhomtable, ntarget, njob );
+ else
+ FreeLocalHomTable_half( localhomtable, njob );
+ }
+ free( targetmap );
+ free( targetmapr );
+
+ if( spscoreout ) reporterr( "Unweighted sum-of-pairs score = %10.5f\n", sumofpairsscore( njob, bseq ) );
SHOWVERSION;
+ if( ndeleted > 0 )
+ {
+ reporterr( "\nTo keep the alignment length, %d letters were DELETED.\n", ndeleted );
+ if( mapout )
+ reporterr( "The deleted letters are shown in the (filename).map file.\n" );
+ else
+ reporterr( "To know the positions of deleted letters, rerun the same command with the --mapout option.\n" );
+ }
+
+
+ free( kozoarivec );
+ FreeCharMtx( seq );
+ FreeCharMtx( bseq );
+ free( mseq1 );
+ free( mseq2 );
+
+ FreeCharMtx( name );
+ free( nlen );
+ free( selfscore );
+
+// for( i=0; i<njob; i++ )
+// {
+// free( topol[i][0] );
+// free( topol[i][1] );
+// free( topol[i] );
+// }
+// free( topol );
+// free( len );
+// free( iscore );
+ free( eff );
+ free( dep );
+ closeFiles();
+ if( nadd ) free( addmem );
+
return( 0 );
+
+chudan:
+ if( seq ) FreeCharMtx( seq ); seq = NULL;
+ if( mseq1 ) free( mseq1 ); mseq1 = NULL;
+ if( mseq2 ) free( mseq2 ); mseq2 = NULL;
+
+ if( name ) FreeCharMtx( name ); name = NULL;
+ if( nlen ) free( nlen ); nlen = NULL;
+ if( selfscore ) free( selfscore ); selfscore = NULL;
+ if( mergeoralign ) free( mergeoralign ); mergeoralign = NULL;
+
+ if( localhomtable )
+ {
+ reporterr( "freeing localhomtable\n" );
+ if( specifictarget )
+ FreeLocalHomTable_part( localhomtable, ntarget, njob );
+ else
+ FreeLocalHomTable_half( localhomtable, njob );
+ }
+ localhomtable = NULL;
+ if( targetmap ) free( targetmap ); targetmap = NULL;
+ if( targetmapr ) free( targetmapr ); targetmapr = NULL;
+
+ if( kozoarivec ) free( kozoarivec ); kozoarivec = NULL;
+
+
+ if( topol ) FreeIntCub( topol ); topol = NULL;
+ if( len ) FreeFloatMtx( len ); len = NULL;
+ if( iscore ) FreeFloatHalfMtx( iscore, njob ); iscore = NULL;
+ if( eff ) free( eff ); eff = NULL;
+ if( dep ) free( dep ); dep = NULL;
+
+ freeconstants();
+ closeFiles();
+ FreeCommonIP();
+ return( 0 );
+
}
#include "mltaln.h"
#define DEBUG 0
+#define USEDISTONTREE 1
#if 0
void mdfymtx( char pair[njob][njob], int s1, double **partialmtx, double **mtx )
}
-float score_calc( char **seq, int s ) /* method 3 */
+double score_calc( char **seq, int s ) /* method 3 */
{
int i, j, k, c;
int len = strlen( seq[0] );
- float score;
+ double score;
int tmpscore;
char *mseq1, *mseq2;
score += (double)tmpscore / (double)c;
}
}
- score = (float)score / ( ( (double)s * ((double)s-1.0) ) / 2.0 );
+ score = (double)score / ( ( (double)s * ((double)s-1.0) ) / 2.0 );
fprintf( stderr, "score in score_calc = %f\n", score );
return( score );
}
-void cpmx_calc( char **seq, float **cpmx, double *eff, int lgth, int clus )
+void cpmx_calc( char **seq, double **cpmx, double *eff, int lgth, int clus )
{
int i, j, k;
double totaleff = 0.0;
for( i=0; i<clus; i++ ) totaleff += eff[i];
- for( i=0; i<26; i++ ) for( j=0; j<lgth; j++ ) cpmx[i][j] = 0.0;
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<lgth; j++ ) cpmx[i][j] = 0.0;
for( j=0; j<lgth; j++ ) for( k=0; k<clus; k++ )
- cpmx[(int)amino_n[(int)seq[k][j]]][j] += (float)eff[k] / totaleff;
+ cpmx[(int)amino_n[(unsigned char)seq[k][j]]][j] += (double)eff[k] / totaleff;
}
-void cpmx_calc_new_bk( char **seq, float **cpmx, double *eff, int lgth, int clus ) // summ eff must be 1.0
+void cpmx_calc_new_bk( char **seq, double **cpmx, double *eff, int lgth, int clus ) // summ eff must be 1.0
{
int i, j, k;
- float feff;
+ double feff;
- for( i=0; i<26; i++ ) for( j=0; j<lgth; j++ ) cpmx[i][j] = 0.0;
+ for( i=0; i<nalphabets; i++ ) for( j=0; j<lgth; j++ ) cpmx[i][j] = 0.0;
for( k=0; k<clus; k++ )
{
- feff = (float)eff[k];
+ feff = (double)eff[k];
for( j=0; j<lgth; j++ )
{
cpmx[(int)amino_n[(int)seq[k][j]]][j] += feff;
}
}
-void cpmx_calc_new( char **seq, float **cpmx, double *eff, int lgth, int clus ) // summ eff must be 1.0
+void cpmx_calc_add( char **seq, double **cpmx, double *eff, int lgth, int clus ) // lastmem = newmem; summ eff must be 1.0
+{
+ double neweff, orieff;
+ int newmem, i, j;
+
+ newmem = clus-1;
+ neweff = eff[clus-1];
+ orieff = 1.0-neweff;
+#if 1 // TESTING Feb/1/18:00
+ for( j=0; j<lgth; j++ )
+ {
+ for( i=0;i<nalphabets; i++ ) cpmx[i][j] *= orieff;
+ cpmx[(unsigned char)amino_n[(unsigned char)seq[newmem][j]]][j] += neweff;
+ }
+#else // possibly faster?
+ for( i=0;i<nalphabets; i++ )
+ {
+ for( j=0; j<lgth; j++ ) cpmx[i][j] *= orieff;
+ }
+ for( j=0; j<lgth; j++ ) cpmx[(unsigned char)amino_n[(unsigned char)seq[newmem][j]]][j] += neweff;
+#endif
+}
+
+void cpmx_calc_new( char **seq, double **cpmx, double *eff, int lgth, int clus ) // summ eff must be 1.0
{
int i, j, k;
- float feff;
- float *cpmxpt, **cpmxptpt;
+ double feff;
+ double *cpmxpt, **cpmxptpt;
char *seqpt;
- j = 26;
+ j = nalphabets;
cpmxptpt = cpmx;
while( j-- )
{
}
for( k=0; k<clus; k++ )
{
- feff = (float)eff[k];
+ feff = (double)eff[k];
seqpt = seq[k];
// fprintf( stderr, "seqpt = %s, lgth=%d\n", seqpt, lgth );
for( j=0; j<lgth; j++ )
{
- cpmx[(int)amino_n[(int)*seqpt++]][j] += feff;
+ cpmx[(unsigned char)amino_n[(unsigned char)*seqpt++]][j] += feff;
}
}
}
-void MScpmx_calc_new( char **seq, float **cpmx, double *eff, int lgth, int clus ) // summ eff must be 1.0
+void MScpmx_calc_new( char **seq, double **cpmx, double *eff, int lgth, int clus ) // summ eff must be 1.0
{
int i, j, k;
- float feff;
- float **cpmxptpt, *cpmxpt;
+ double feff;
+ double **cpmxptpt, *cpmxpt;
char *seqpt;
j = lgth;
while( j-- )
{
cpmxpt = *cpmxptpt++;
- i = 26;
+ i = nalphabets;
while( i-- )
*cpmxpt++ = 0.0;
}
for( k=0; k<clus; k++ )
{
- feff = (float)eff[k];
+ feff = (double)eff[k];
seqpt = seq[k];
cpmxptpt = cpmx;
j = lgth;
while( j-- )
- (*cpmxptpt++)[(int)amino_n[(int)*seqpt++]] += feff;
+ (*cpmxptpt++)[(int)amino_n[(unsigned char)*seqpt++]] += feff;
}
#if 0
- for( j=0; j<lgth; j++ ) for( i=0; i<26; i++ ) cpmx[j][i] = 0.0;
+ for( j=0; j<lgth; j++ ) for( i=0; i<nalphabets; i++ ) cpmx[j][i] = 0.0;
for( k=0; k<clus; k++ )
{
- feff = (float)eff[k];
+ feff = (double)eff[k];
for( j=0; j<lgth; j++ )
cpmx[j][(int)amino_n[(int)seq[k][j]]] += feff;
}
#endif
}
-void cpmx_ribosum( char **seq, char **seqr, char *dir, float **cpmx, double *eff, int lgth, int clus ) // summ eff must be 1.0
+void cpmx_ribosum( char **seq, char **seqr, char *dir, double **cpmx, double *eff, int lgth, int clus ) // summ eff must be 1.0
{
int i, j, k;
- float feff;
- float **cpmxptpt, *cpmxpt;
+ double feff;
+ double **cpmxptpt, *cpmxpt;
char *seqpt, *seqrpt, *dirpt;
int code, code1, code2;
}
for( k=0; k<clus; k++ )
{
- feff = (float)eff[k];
+ feff = (double)eff[k];
seqpt = seq[k];
seqrpt = seqr[k];
dirpt = dir;
else
code = code1;
#else
- code1 = amino_n[(int)*seqpt];
- code2 = amino_n[(int)*seqrpt];
+ code1 = amino_n[(unsigned char)*seqpt];
+ code2 = amino_n[(unsigned char)*seqrpt];
if( code1 > 3 )
{
code = 36;
}
}
-void makegrouprnait( RNApair ***group, RNApair ***all, char **pair, int s )
-{
- int k, m;
- for( m=s, k=0; m<njob; m++ )
- {
- if( pair[s][m] != 0 )
- {
- group[k++] = all[m];
- }
- }
-}
int fastconjuction_noweight( int *memlist, char **seq, char **aseq, double *peff, char *d )
{
}
-int fastconjuction_noname( int *memlist, char **seq, char **aseq, double *peff, double *eff, char *d )
+#if 0
+int fastconjuction_target( int *memlist, char **seq, char **aseq, double *peff, double *eff, char *d, double mineff, int *targetmap )
{
int m, k, dln;
char b[B];
double total;
+ int *memlistbk = memlist;
#if DEBUG
fprintf( stderr, "s = %d\n", s );
strcat( d, b );
#endif
aseq[k] = seq[m];
- peff[k] = eff[m];
+ if( eff[m] < mineff )
+ peff[k] = mineff;
+ else
+ peff[k] = eff[m];
+
total += peff[k];
}
#if 1
for( m=0; m<k; m++ )
{
-// fprintf( stderr, "peff[%d] = %f\n", m, peff[m] );
+// fprintf( stderr, "Apr17 peff[%d] = %f\n", m, peff[m] );
peff[m] /= total;
}
#endif
+
return( k );
}
+#endif
-int fastconjuction( int *memlist, char **seq, char **aseq, double *peff, double *eff, char name[M][B], char aname[M][B], char *d )
+
+int fastconjuction_noname( int *memlist, char **seq, char **aseq, double *peff, double *eff, char *d, double mineff )
{
int m, k, dln;
char b[B];
strcat( d, b );
#endif
aseq[k] = seq[m];
- peff[k] = eff[m];
+ if( eff[m] < mineff )
+ peff[k] = mineff;
+ else
+ peff[k] = eff[m];
+
total += peff[k];
-#if 0
- strcpy( aname[k], name[m] );
-#endif
}
#if 1
for( m=0; m<k; m++ )
+ {
+// fprintf( stderr, "Apr17 peff[%d] = %f\n", m, peff[m] );
peff[m] /= total;
+ }
#endif
return( k );
}
-
-int conjuctionfortbfast_kozo( double *tmptmptmp, char **pair, int s, char **seq, char **aseq, double *peff, double *eff, double *peff_kozo, double *eff_kozo, char *d )
+int fastconjuction( int *memlist, char **seq, char **aseq, double *peff, double *eff, char name[M][B], char aname[M][B], char *d )
{
- int m, k;
+ int m, k, dln;
char b[B];
double total;
- double total_kozo;
#if DEBUG
fprintf( stderr, "s = %d\n", s );
#endif
total = 0.0;
-// total_kozo = 0.0;
- total_kozo = *tmptmptmp; // masaka
d[0] = 0;
- for( m=s, k=0; m<njob; m++ )
+ dln = 0;
+ for( k=0; *memlist!=-1; memlist++, k++ )
{
- if( pair[s][m] != 0 )
- {
- sprintf( b, " %d", m+1 );
+ m = *memlist;
+ dln += sprintf( b, " %d", m+1 );
#if 1
- if( strlen( d ) < 100 ) strcat( d, b );
+ if( dln < 100 ) strcat( d, b );
#else
- strcat( d, b );
+ strcat( d, b );
#endif
- aseq[k] = seq[m];
- peff[k] = eff[m];
- peff_kozo[k] = eff_kozo[m];
- total += peff[k];
- total_kozo += peff_kozo[k];
+ aseq[k] = seq[m];
+ peff[k] = eff[m];
+ total += peff[k];
#if 0
strcpy( aname[k], name[m] );
#endif
- k++;
- }
}
#if 1
for( m=0; m<k; m++ )
peff[m] /= total;
- if( total_kozo > 0.0 )
- {
- for( m=0; m<k; m++ )
- {
- peff_kozo[m] /= total_kozo;
- if( peff_kozo[m] > 0.0 ) peff_kozo[m] += peff[m];
- }
- }
- else //iranai
- {
- for( m=0; m<k; m++ ) peff_kozo[m] = 0.0;
- }
#endif
-// fprintf( stderr, "\n\ndvtditr_total_kozo = %f\n\n", total_kozo );
- *tmptmptmp = total_kozo;
return( k );
}
-int conjuctionfortbfast( char **pair, int s, char **seq, char **aseq, double *peff, double *eff, char *d )
+
+
+int conjuctionfortbfast_old( char **pair, int s, char **seq, char **aseq, double *peff, double *eff, char *d )
{
int m, k;
char *b;
free( b );
return( k );
}
+
int conjuction( char **pair, int s, char **seq, char **aseq, double *peff, double *eff, char **name, char **aname, char *d )
{
int m, k;
return( k );
}
-void floatdelete( float **cpmx, int d, int len )
+void doubledelete( double **cpmx, int d, int len )
{
int i, j;
for( i=d; i<len-1; i++ )
{
- for( j=0; j<26; j++ )
+ for( j=0; j<nalphabets; j++ )
{
cpmx[j][i] = cpmx[j][i+1];
}
-
-
-#if 0
-void OneClusterAndTheOther( int locnjob, char pair[njob][njob], int *s1, int *s2, int ***topol, int step, int branch )
-#else
-void OneClusterAndTheOther( int locnjob, char **pair, int *s1, int *s2, int ***topol, int step, int branch )
-#endif
+void OneClusterAndTheOther_fast( int locnjob, int *memlist1, int *memlist2, int *s1, int *s2, char *pair, int ***topol, int step, int branch, double **smalldistmtx, double **distmtx, double *distontree )
{
- int i;
+ int i, k, j;
int r1;
-
- *s1 = topol[step][branch][0];
- for( i=0; (r1=topol[step][branch][i])>-1; i++ )
- pair[*s1][r1] = 1;
- for( i=0; i<locnjob; i++ )
+// char *pair;
+
+// pair = calloc( locnjob, sizeof( char ) );
+
+ for( i=0; i<locnjob; i++ ) pair[i] = 0;
+ for( i=0, k=0; (r1=topol[step][branch][i])>-1; i++ )
+ {
+ pair[r1] = 1;
+ memlist1[k++] = r1;
+ }
+ memlist1[k] = -1;
+
+ for( i=0, k=0; i<locnjob; i++ )
{
- if( !pair[*s1][i] )
+ if( !pair[i] )
{
- *s2 = i;
- break;
+ memlist2[k++] = i;
}
}
- for( i=*s2; i<locnjob; i++ )
- {
- if( !pair[*s1][i] )
- pair[*s2][i] = 1;
- }
+ memlist2[k] = -1;
+
+ *s1 = memlist1[0];
+ *s2 = memlist2[0];
+
+ if( smalldistmtx )
+ {
+ int im, jm;
+#if USEDISTONTREE
+ for( i=0; (im=memlist1[i])!=-1; i++ ) for( j=0; (jm=memlist2[j])!=-1; j++ )
+ smalldistmtx[i][j] = distontree[im] + distontree[jm];
+#else
+ for( i=0; (im=memlist1[i])!=-1; i++ ) for( j=0; (jm=memlist2[j])!=-1; j++ )
+ smalldistmtx[i][j] = distmtx[MIN(im,jm)][MAX(im,jm)];
+#endif
+
+#if 0
+ reporterr( "\n" );
+ for( i=0; (im=memlist1[i])!=-1; i++ ) for( j=0; (jm=memlist2[j])!=-1; j++ )
+ reporterr( "smalldistmtx[%d][%d] = %f\n", i, j, smalldistmtx[i][j] );
+
+
+ for( i=0; (im=memlist1[i])!=-1; i++ ) for( j=0; (jm=memlist2[j])!=-1; j++ )
+ smalldistmtx[i][j] = distmtx[MIN(im,jm)][MAX(im,jm)];
+
+ for( i=0; (im=memlist1[i])!=-1; i++ ) for( j=0; (jm=memlist2[j])!=-1; j++ )
+ reporterr( "old smalldistmtx[%d][%d] = %f\n", i, j, smalldistmtx[i][j] );
+if( im > 10 && jm > 10 ) exit( 1 );
+#endif
+ }
}
+
void makeEffMtx( int nseq, double **mtx, double *vec )
{
}
return( 0 );
}
-int msshrinklocalhom( char **pair, int s1, int s2, LocalHom **localhom, LocalHom ***localhomshrink )
+int msshrinklocalhom_fast_target( int *memlist1, int *memlist2, LocalHom **localhom, LocalHom ***localhomshrink, char *swaplist, int *targetmap )
{
- int m1, k1, n1, m2, k2, n2;
+ int m1, k1, m2, k2, t1, i2;
- for( m1=s1, k1=0; m1<njob; m1++ )
+ for( k1=0; (m1=memlist1[k1])!=-1; k1++ )
{
- if( pair[s1][m1] != 0 )
- {
- for( m2=s2, k2=0; m2<njob; m2++ )
+ if( targetmap[m1] == -1 )
+ {
+ swaplist[k1] = 1;
+// swaplist[k1] = 0; // DAME!!!
+ for( k2=0; (m2=memlist2[k2])!=-1; k2++ )
{
- if( pair[s2][m2] != 0 )
+ if( targetmap[m2] == -1 )
{
- n1 = MIN(m1,m2); n2=MAX(m1,m2);
- if( localhom[m1][m2].opt == -1 )
- localhomshrink[k1][k2] = NULL;
- else
- localhomshrink[k1][k2] = localhom[n1]+n2;
- k2++;
+ localhomshrink[k1][k2] = NULL;
+ continue;
}
+
+ t1 = targetmap[m2]; // start1 <-> start2, end1 <-> end2
+ i2 = m1;
+
+ if( localhom[t1][i2].opt == -1 )
+ localhomshrink[k1][k2] = NULL;
+ else
+ localhomshrink[k1][k2] = localhom[t1]+i2;
+ }
+ }
+ else
+ {
+ swaplist[k1] = 0;
+ for( k2=0; (m2=memlist2[k2])!=-1; k2++ )
+ {
+ t1 = targetmap[m1];
+ i2 = m2;
+
+ if( localhom[t1][i2].opt == -1 )
+ localhomshrink[k1][k2] = NULL;
+ else
+ localhomshrink[k1][k2] = localhom[t1]+i2;
}
- k1++;
+ }
+ }
+ return( 0 );
+}
+
+int msshrinklocalhom_fast_half( int *memlist1, int *memlist2, LocalHom **localhom, LocalHom ***localhomshrink )
+{
+ int m1, k1, m2, k2;
+
+ for( k1=0; (m1=memlist1[k1])!=-1; k1++ )
+ {
+ for( k2=0; (m2=memlist2[k2])!=-1; k2++ )
+ {
+ if( m1 < m2 )
+ {
+ if( localhom[m1][m2-m1].opt == -1 )
+ localhomshrink[k1][k2] = NULL;
+ else
+ localhomshrink[k1][k2] = localhom[m1]+m2-m1;
+ }
+ else
+ {
+ if( localhom[m2][m1-m2].opt == -1 )
+ localhomshrink[k1][k2] = NULL;
+ else
+ localhomshrink[k1][k2] = localhom[m2]+m1-m2;
+ }
+ }
+ }
+ return( 0 );
+}
+
+int msshrinklocalhom_fast( int *memlist1, int *memlist2, LocalHom **localhom, LocalHom ***localhomshrink )
+{
+ int m1, k1, m2, k2;
+
+ for( k1=0; (m1=memlist1[k1])!=-1; k1++ )
+ {
+ for( k2=0; (m2=memlist2[k2])!=-1; k2++ )
+ {
+ if( localhom[m1][m2].opt == -1 )
+ localhomshrink[k1][k2] = NULL;
+ else
+ localhomshrink[k1][k2] = localhom[m1]+m2;
+ }
+ }
+ return( 0 );
+}
+int fastshrinklocalhom_one( int *mem1, int *mem2, int norg, LocalHom **localhom, LocalHom ***localhomshrink )
+{
+ int k1, k2;
+ int *intpt1, *intpt2;
+
+
+ for( intpt1=mem1, k1=0; *intpt1!=-1; intpt1++, k1++ )
+ {
+ for( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ )
+ {
+ if( *intpt2 != norg )
+ {
+ fprintf( stderr, "ERROR! *intpt2 = %d\n", *intpt2 );
+ exit( 1 );
+ }
+ if( localhom[*intpt1][0].opt == -1 )
+ localhomshrink[k1][k2] = NULL;
+ else
+ localhomshrink[k1][k2] = localhom[*intpt1];
}
}
return( 0 );
localhomshrink[k1][k2] = NULL;
else
localhomshrink[k1][k2] = localhom[*intpt1]+*intpt2;
+
+// if( localhomshrink[k1][k2] != NULL )
+// printf( "ori localhomshrink[%d][%d].opt = %f\n", k1, k2, localhomshrink[k1][k2]->opt );
+ }
+ }
+ return( 0 );
+}
+
+
+int fastshrinklocalhom_half( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink )
+{
+ int k1, k2;
+ int *intpt1, *intpt2;
+
+
+ for( intpt1=mem1, k1=0; *intpt1!=-1; intpt1++, k1++ )
+ {
+ for( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ )
+ {
+ if( *intpt1 < *intpt2 )
+ {
+ if( localhom[*intpt1][*intpt2-*intpt1].opt == -1 )
+ localhomshrink[k1][k2] = NULL;
+ else
+ localhomshrink[k1][k2] = localhom[*intpt1]+*intpt2-*intpt1;
+ }
+ else
+ {
+ if( localhom[*intpt2][*intpt1-*intpt2].opt == -1 )
+ localhomshrink[k1][k2] = NULL;
+ else
+ localhomshrink[k1][k2] = localhom[*intpt2]+*intpt1-*intpt2;
+ }
+
+// if( localhomshrink[k1][k2] != NULL )
+// printf( "ori localhomshrink[%d][%d].opt = %f, .importance=%f\n", k1, k2, localhomshrink[k1][k2]->opt, localhomshrink[k1][k2]->importance );
+ }
+ }
+ return( 0 );
+}
+
+
+int fastshrinklocalhom_target( int *mem1, int *mem2, LocalHom **localhom, LocalHom ***localhomshrink, char *swaplist, int *targetmap )
+{
+ int k1, k2;
+ int *intpt1, *intpt2;
+ int t1, i2;
+
+
+ for( intpt1=mem1, k1=0; *intpt1!=-1; intpt1++, k1++ )
+ {
+ if( targetmap[*intpt1] == -1 )
+ {
+ swaplist[k1] = 1;
+// swaplist[k1] = 0; // DAME!!!
+ for( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ )
+ {
+ if( targetmap[*intpt2] == -1 )
+ {
+ localhomshrink[k1][k2] = NULL;
+ continue;
+ }
+
+ t1 = targetmap[*intpt2]; // end1<->end2, start1<->start2
+ i2 = *intpt1;
+
+ if( localhom[t1][i2].opt == -1 )
+ localhomshrink[k1][k2] = NULL;
+ else
+ localhomshrink[k1][k2] = localhom[t1]+i2;
+
+// if( localhomshrink[k1][k2] != NULL )
+// printf( "localhomshrink[%d][%d].opt = %f\n", k1, k2, localhomshrink[k1][k2]->opt );
+// else
+// printf( "localhomshrink[%d][%d] = NULL\n", k1, k2 );
+ }
+ }
+ else
+ {
+ swaplist[k1] = 0;
+ for( intpt2=mem2, k2=0; *intpt2!=-1; intpt2++, k2++ )
+ {
+ t1 = targetmap[*intpt1];
+ i2 = *intpt2;
+
+ if( localhom[t1][i2].opt == -1 )
+ localhomshrink[k1][k2] = NULL;
+ else
+ localhomshrink[k1][k2] = localhom[t1]+i2;
+
+// if( localhomshrink[k1][k2] != NULL )
+// printf( "localhomshrink[%d][%d].opt = %f\n", k1, k2, localhomshrink[k1][k2]->opt );
+// else
+// printf( "localhomshrink[%d][%d] = NULL\n", k1, k2 );
+ }
}
}
return( 0 );
#include "mltaln.h"
+#define FULLSCORE 0
+
#define DEBUG 0
#define RECORD 0
static int nwa;
+
#ifdef enablemultithread
typedef struct _threadarg
{
int maxiter;
int nkozo;
int *subgenerationpt;
- float *basegainpt;
- float *gainlist;
- float *tscorelist;
+ double *basegainpt;
+ double *gainlist;
+ double *tscorelist;
int *generationofinput;
char *kozoarivec;
char **mastercopy;
int alloclen;
Node *stopol;
int ***topol;
-// double **len;
- float **tscorehistory_detail;
+ double **len;
+ double **tscorehistory_detail;
int *finishpt;
+ int **skipthisbranch;
+ double **distmtx;
+ int ntarget;
+ int *targetmap;
pthread_mutex_t *mutex;
pthread_cond_t *collection_end;
pthread_cond_t *collection_start;
}
#endif
+
+static void makescoringmatrices( double ***matrices, double **originalmtx )
+{
+ int c;
+ double rep;
+ for( c=0; c<maxdistclass; c++ )
+ {
+ rep = (double) 2 * c / ndistclass; // rep:0..2
+// fprintf( stderr, "rep = %f\n", rep );
+ makedynamicmtx( matrices[c], originalmtx, rep * 0.5 ); // upgma ni awaseru node, 0..1
+// fprintf( stderr, "c=%d, score for %c-%c = %f\n", c, 'W', 'W', matrices[c][amino_n['W']][amino_n['W']] );
+ }
+}
+
+static void classifypairs( int n1, double **eff1s, double *eff1, int n2, double **eff2s, double *eff2, double **smalldistmtx, int **matnum, int maxdistclass )
+{
+ int i, j, c;
+ for( c=0; c<maxdistclass; c++ )
+ {
+ for( i=0; i<n1; i++ ) eff1s[c][i] = 0.0;
+ for( j=0; j<n2; j++ ) eff2s[c][j] = 0.0;
+ }
+
+#if 0
+ for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+ {
+ c = (int)( smalldistmtx[i][j] / 2.0 * ndistclass ); // dist:0..2
+// if( c >= ndistclass ) c = ndistclass-1;
+ if( c >= maxdistclass ) c = maxdistclass-1;
+ fprintf( stderr, "pair %d-%d (%f), dist=%f -> c=%d\n", i, j, eff1[i] * eff2[j], smalldistmtx[i][j], c );
+ eff1s[c][i] += 1.0;
+ eff2s[c][j] += 1.0;
+ matnum[i][j] = c;
+ }
+ for( c=0; c<maxdistclass; c++ ) for( i=0; i<n1; i++ ) if(eff1s[c][i]) eff1s[c][i] = eff1[i]/eff1s[c][i];
+ for( c=0; c<maxdistclass; c++ ) for( i=0; i<n2; i++ ) if(eff2s[c][i]) eff2s[c][i] = eff2[i]/eff2s[c][i];
+#else
+ for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ )
+ {
+ c = (int)( smalldistmtx[i][j] / 2.0 * ndistclass ); // dist:0..2
+// if( c >= ndistclass ) c = ndistclass-1;
+ if( c >= maxdistclass ) c = maxdistclass-1;
+// fprintf( stderr, "pair %d-%d (%f), dist=%f -> c=%d\n", i, j, eff1[i] * eff2[j], smalldistmtx[i][j], c );
+ eff1s[c][i] = eff1[i];
+ eff2s[c][j] = eff2[j];
+ matnum[i][j] = c;
+ }
+#endif
+#if 0
+ double totaleff;
+ for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ ) reporterr( "whichmtx[%d][%d] = %d\n", i, j, matnum[i][j] );
+ for( c=0; c<maxdistclass; c++ )
+ {
+ totaleff = 0.0;
+ for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ ) totaleff += eff1s[c][i] * eff2s[c][j];
+ fprintf( stderr, "c=%d, sum totaleff1s-2s = %f\n", c, totaleff );
+ }
+ totaleff = 0.0; for( c=0; c<maxdistclass; c++ ) for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ ) totaleff += eff1s[c][i] * eff2s[c][j];
+ fprintf( stderr, "totaleff1s-2s = %f\n", totaleff );
+ totaleff = 0.0; for( i=0; i<n1; i++ ) for( j=0; j<n2; j++ ) totaleff += eff1[i] * eff2[j];
+ fprintf( stderr, "totaleff1-2 = %f\n", totaleff );
+
+ totaleff = 0.0; for( c=0; c<maxdistclass; c++ ) for( i=0; i<n1; i++ ) totaleff += eff1s[c][i];
+ fprintf( stderr, "totaleff1s = %f\n", totaleff );
+ totaleff = 0.0; for( c=0; c<maxdistclass; c++ ) for( i=0; i<n2; i++ ) totaleff += eff2s[c][i];
+ fprintf( stderr, "totaleff2s = %f\n", totaleff );
+ totaleff = 0.0; for( i=0; i<n1; i++ ) totaleff += eff1[i];
+ fprintf( stderr, "totaleff1 = %f\n", totaleff );
+ totaleff = 0.0; for( i=0; i<n2; i++ ) totaleff += eff2[i];
+ fprintf( stderr, "totaleff2 = %f\n", totaleff );
+ for( c=0; c<maxdistclass; c++ )
+ {
+ for( i=0; i<n1; i++ ) fprintf( stderr, "eff1s[%d][%d] = %f\n", c, i, eff1s[c][i] );
+ for( i=0; i<n2; i++ ) fprintf( stderr, "eff2s[%d][%d] = %f\n", c, i, eff2s[c][i] );
+ fprintf( stderr, "\n" );
+ }
+exit( 1 );
+#endif
+}
+
static void Writeoption2( FILE *fp, int cycle, double cut )
{
fprintf( fp, "%dth cycle\n", cycle );
#ifdef enablemultithread
static void freelocalarrays(
- float *tscorehistory,
+ double *tscorehistory,
RNApair ***grouprna1, RNApair ***grouprna2,
RNApair *rnapairboth,
char *indication1, char *indication2,
+ double *distarr,
double *effarr, double *effarrforlocalhom, double *effarr1, double *effarr2,
char **mseq1, char **mseq2,
char **localcopy,
int *gapmap1, int *gapmap2,
double *effarr1_kozo, double *effarr2_kozo, double *effarr_kozo,
- char **pair,
- LocalHom *** localhomshrink
+ int **memlist,
+ char *pairbuf,
+ LocalHom *** localhomshrink,
+ char *swaplist,
+ double **smalldistmtx,
+ double ***scoringmatrices,
+ double **eff1s, double **eff2s,
+ int **whichmtx
)
{
// fprintf( stderr, "Skipping freelocalarrays\n" );
int i;
if( commonIP ) FreeIntMtx( commonIP );
commonIP = NULL;
- Falign( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
- Falign_localhom( NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL,NULL, 0, NULL );
+ Falign( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, NULL );
+ Falign_localhom( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL,NULL, 0, NULL );
+ D__align( NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL, 0, 0 );
+ partA__align_variousdist( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, 0, 0, NULL, NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0, NULL );
+
+
if( rnakozo && rnaprediction == 'm' )
{
free( grouprna1 ); // nakamimo?
free( grouprna2 ); // nakamimo?
}
- free( tscorehistory );
+ if( tscorehistory ) free( tscorehistory );
free( indication1 );
free( indication2 );
+ free( distarr );
free( effarr );
free( effarrforlocalhom );
free( effarr1 );
free( effarr2 );
free( mseq1 );
free( mseq2 );
- FreeCharMtx( localcopy );
+ if( localcopy ) FreeCharMtx( localcopy );
free( gapmap1 );
free( gapmap2 );
free( effarr2_kozo );
free( effarr_kozo );
- FreeCharMtx( pair );
+ FreeIntMtx( memlist );
+ free( pairbuf );
+
+ if( smalldistmtx ) FreeDoubleMtx( smalldistmtx );
+ if( scoringmatrices ) FreeDoubleCub( scoringmatrices );
+ if( eff1s ) FreeDoubleMtx( eff1s );
+ if( eff2s ) FreeDoubleMtx( eff2s );
+ if( whichmtx ) FreeIntMtx( whichmtx );
if( rnakozo ) free( rnapairboth );
free( localhomshrink[i] ); // nakamimo??
}
free( localhomshrink );
+ free( swaplist );
}
}
int *collectingpt = targ->collectingpt;
int *jobposintpt = targ->jobposintpt;
int nkozo = targ->nkozo;
- float *gainlist = targ->gainlist;
- float *tscorelist = targ->tscorelist;
+ double *gainlist = targ->gainlist;
+ double *tscorelist = targ->tscorelist;
int *generationofinput = targ->generationofinput;
int *subgenerationpt = targ->subgenerationpt;
- float *basegainpt = targ->basegainpt;
+ double *basegainpt = targ->basegainpt;
char *kozoarivec = targ->kozoarivec;
char **mastercopy = targ->mastercopy;
char ***candidates = targ->candidates;
int alloclen = targ->alloclen;
Node * stopol = targ->stopol;
int ***topol = targ->topol;
-// double **len = targ->len;
- float **tscorehistory_detail = targ->tscorehistory_detail;
+ double **len = targ->len;
+ double **tscorehistory_detail = targ->tscorehistory_detail;
int *finishpt = targ->finishpt;
+ int **skipthisbranch = targ->skipthisbranch;
+ double **distmtx = targ->distmtx;
+ int ntarget = targ->ntarget;
+ int *targetmap = targ->targetmap;
- int i, j, k, l, ii;
- float gain;
+ int i, k, l, ii;
+ double gain;
int iterate;
- char **pair;
+ int **memlist;
+ char *pairbuf;
int locnjob;
int s1, s2;
int clus1, clus2;
char **localcopy;
char **mseq1, **mseq2;
+ double *distarr; // re-calc
double *effarr, *effarr_kozo; // re-calc
double *effarr1, *effarr2, *effarr1_kozo, *effarr2_kozo;
char *indication1, *indication2;
RNApair ***grouprna1, ***grouprna2;
RNApair *rnapairboth;
LocalHom ***localhomshrink;
+ char *swaplist;
int *gapmap1, *gapmap2;
- float tscore, mscore, oimpmatch, impmatch;
+ double tscore, mscore;
+ double oimpmatchdouble;
+ double impmatchdouble;
int identity;
double tmpdouble;
- float naivescore0 = 0, naivescore1;
+// double naivescore0 = 0, naivescore1;
double *effarrforlocalhom;
- float *tscorehistory;
+ double *tscorehistory;
int intdum;
#if 0
int oscillating;
int lin, ldf;
#endif
- float maxgain;
+ double maxgain;
int bestthread;
int branchpos;
int subgenerationatfirst;
int myjob;
int converged2 = 0;
int chudanres;
+ double **smalldistmtx;
+ double ***scoringmatrices;
+ double **eff1s, **eff2s;
+ int **whichmtx;
locnjob = njob;
exit( 1 );
}
- tscorehistory = calloc( maxiter, sizeof( float ) );
+ tscorehistory = calloc( maxiter, sizeof( double ) );
if( rnakozo && rnaprediction == 'm' )
{
grouprna1 = grouprna2 = NULL;
}
- indication1 = AllocateCharVec( njob*3+50 );
- indication2 = AllocateCharVec( njob*3+50 );
+ indication1 = AllocateCharVec( 150 );
+ indication2 = AllocateCharVec( 150 );
+ distarr = AllocateDoubleVec( locnjob );
effarr = AllocateDoubleVec( locnjob );
effarrforlocalhom = AllocateDoubleVec( locnjob );
effarr1 = AllocateDoubleVec( locnjob );
localcopy = AllocateCharMtx( locnjob, alloclen );
gapmap1 = AllocateIntVec( alloclen );
gapmap2 = AllocateIntVec( alloclen );
+ if( specificityconsideration != 0 )
+ {
+ smalldistmtx = AllocateDoubleMtx( locnjob, locnjob ); // ookii?
+ scoringmatrices = AllocateDoubleCub( maxdistclass, nalphabets, nalphabets );
+ makescoringmatrices( scoringmatrices, n_dis_consweight_multi );
+ eff1s = AllocateDoubleMtx( maxdistclass, locnjob );
+ eff2s = AllocateDoubleMtx( maxdistclass, locnjob );
+ whichmtx = AllocateIntMtx( locnjob, locnjob );
+ }
+ else
+ {
+ smalldistmtx = NULL;
+ scoringmatrices = NULL;
+ eff1s = eff2s = NULL;
+ whichmtx = NULL;
+ }
effarr1_kozo = AllocateDoubleVec( locnjob ); // tsuneni allocate suru.
effarr2_kozo = AllocateDoubleVec( locnjob ); // tsuneni allocate suru.
for( i=0; i<locnjob; i++ )
effarr_kozo[i] = effarr1_kozo[i] = effarr2_kozo[i] = 0.0;
- pair = AllocateCharMtx( locnjob, locnjob );
+ memlist = AllocateIntMtx( 2, locnjob );
+ pairbuf = AllocateCharVec( locnjob );
if( rnakozo ) rnapairboth = (RNApair *)calloc( alloclen, sizeof( RNApair ) );
+ swaplist = NULL;
if( constraint )
{
+ if( ntarget < locnjob ) swaplist = calloc( njob, sizeof( char ) );
localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom ** ) );
for( i=0; i<njob; i++)
{
}
}
-
if( thread_no == 0 )
{
*ntrypt = 0;
grouprna1, grouprna2,
rnapairboth,
indication1, indication2,
+ distarr,
effarr, effarrforlocalhom, effarr1, effarr2,
mseq1, mseq2,
localcopy,
gapmap1, gapmap2,
effarr1_kozo, effarr2_kozo, effarr_kozo,
- pair,
- localhomshrink
+ memlist, pairbuf,
+ localhomshrink,
+ swaplist,
+ smalldistmtx,
+ scoringmatrices,
+ eff1s, eff2s,
+ whichmtx
);
// return( NULL );
pthread_exit( NULL );
grouprna1, grouprna2,
rnapairboth,
indication1, indication2,
+ distarr,
effarr, effarrforlocalhom, effarr1, effarr2,
mseq1, mseq2,
localcopy,
gapmap1, gapmap2,
effarr1_kozo, effarr2_kozo, effarr_kozo,
- pair,
- localhomshrink
+ memlist, pairbuf,
+ localhomshrink,
+ swaplist,
+ smalldistmtx,
+ scoringmatrices,
+ eff1s, eff2s,
+ whichmtx
);
return( NULL );
pthread_exit( NULL );
grouprna1, grouprna2,
rnapairboth,
indication1, indication2,
+ distarr,
effarr, effarrforlocalhom, effarr1, effarr2,
mseq1, mseq2,
localcopy,
gapmap1, gapmap2,
effarr1_kozo, effarr2_kozo, effarr_kozo,
- pair,
- localhomshrink
+ memlist, pairbuf,
+ localhomshrink,
+ swaplist,
+ smalldistmtx,
+ scoringmatrices,
+ eff1s, eff2s,
+ whichmtx
);
return( NULL );
pthread_exit( NULL );
if( l == locnjob-2 ) k = 1;
else k = myjob - l * 2;
// fprintf( stderr, "JOB l=%d, k=%d\n", l, k );
+
+
branchpos = myjob;
(*jobposintpt)++;
iterate = *generationofmastercopypt;
(*ntrypt)++;
pthread_mutex_unlock( targ->mutex );
-
+// fprintf( stderr, "\n IRANAI IRANAI *jobposintpt=%d, nbranch = %d\n", *jobposintpt, nbranch );
// fprintf( stderr, "branchpos = %d (thread %d)\n", branchpos, thread_no );
fprintf( stderr, "STEP %03d-%03d-%d (Thread %d) ", iterate+1, l+1, k, thread_no );
fprintf( stderr, "STEP %03d-%03d-%d (thread %d) %s ", iterate+1, l+1, k, thread_no, use_fft?"\n":"\n" );
#endif
- for( i=0; i<locnjob; i++ ) for( j=0; j<locnjob; j++ ) pair[i][j] = 0;
- OneClusterAndTheOther( locnjob, pair, &s1, &s2, topol, l, k );
+// for( i=0; i<2; i++ ) for( j=0; j<locnjob; j++ ) pair[i][j] = 0;
+ distFromABranch( locnjob, distarr, stopol, topol, len, l, k ); // ato de idou
+ OneClusterAndTheOther_fast( locnjob, memlist[0], memlist[1], &s1, &s2, pairbuf, topol, l, k, smalldistmtx, distmtx, distarr );
+
+
+// reporterr( "\n\n\n\n##### memlist[0][0], memlist[1][0] = %d, %d\n", memlist[0][0]+1, memlist[1][0]+1 );
+
+
+
#if 0
fprintf( stderr, "STEP%d-%d\n", l, k );
for( i=0; i<locnjob; i++ )
}
else if( weight == 4 )
{
+
weightFromABranch( locnjob, effarr, stopol, topol, l, k );
if( nkozo ) // hitomadu single weight.
{
if( nkozo )
{
- double tmptmptmp;
- tmptmptmp = 0.0;
- clus1 = conjuctionfortbfast_kozo( &tmptmptmp, pair, s1, localcopy, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
+// double tmptmptmp;
+// tmptmptmp = 0.0;
+// clus1 = conjuctionfortbfast_kozo( &tmptmptmp, pair[0], s1, localcopy, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
+ clus1 = fastconjuction_noname_kozo( memlist[0], localcopy, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
for( i=0; i<clus1; i++ ) effarr1_kozo[i] *= 1.0; // 0.5 ga sairyo ?
- tmptmptmp = 0.0;
- clus2 = conjuctionfortbfast_kozo( &tmptmptmp, pair, s2, localcopy, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
+// tmptmptmp = 0.0;
+// clus2 = conjuctionfortbfast_kozo( &tmptmptmp, pair[1], s2, localcopy, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
+ clus2 = fastconjuction_noname_kozo( memlist[1], localcopy, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
for( i=0; i<clus2; i++ ) effarr2_kozo[i] *= 1.0; // 0.5 ga sairyo ?
#if 0
}
else
{
- clus1 = conjuctionfortbfast( pair, s1, localcopy, mseq1, effarr1, effarr, indication1 );
- clus2 = conjuctionfortbfast( pair, s2, localcopy, mseq2, effarr2, effarr, indication2 );
+// clus1 = conjuctionfortbfast( pair[0], s1, localcopy, mseq1, effarr1, effarr, indication1 );
+// clus2 = conjuctionfortbfast( pair[1], s2, localcopy, mseq2, effarr2, effarr, indication2 );
+ clus1 = fastconjuction_noname( memlist[0], localcopy, mseq1, effarr1, effarr, indication1, minimumweight ); // 2015/Apr/18
+ clus2 = fastconjuction_noname( memlist[1], localcopy, mseq2, effarr2, effarr, indication2, minimumweight ); // 2015/Apr/18
}
if( rnakozo && rnaprediction == 'm' )
{
- makegrouprnait( grouprna1, singlerna, pair, s1 );
- makegrouprnait( grouprna2, singlerna, pair, s2 );
+// makegrouprnait( grouprna1, singlerna, pair[0], s1 );
+// makegrouprnait( grouprna2, singlerna, pair[1], s2 );
+ makegrouprna( grouprna1, singlerna, memlist[0] );
+ makegrouprna( grouprna2, singlerna, memlist[1] );
}
+ if( smalldistmtx )
+ {
+ classifypairs( clus1, eff1s, effarr1, clus2, eff2s, effarr2, smalldistmtx, whichmtx, maxdistclass );
+ }
+
+ if( alg == 'd' ) // Ichijiteki, koredeha scorecheck ga dekinai. gapmap ni taiou surumade
+ {
+ commongappick( clus1, mseq1 );
+ commongappick( clus2, mseq2 );
+ }
+
if( score_check == 2 )
{
fprintf( stderr, "Score_check 2 is not supported in the multithread version.\n" );
}
else if( score_check )
{
- if( RNAscoremtx == 'r' )
- intergroup_score_gapnomi( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
- else
- intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
-
if( constraint )
{
- shrinklocalhom( pair, s1, s2, localhomtable, localhomshrink );
-// weightimportance4( clus1, clus2, effarr1, effarr2, localhomshrink ); // >>>
- oimpmatch = 0.0;
+ if( RNAscoremtx == 'r' )
+ intergroup_score_gapnomi( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
+ else
+ intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
+
+// shrinklocalhom( pair, int s1, int s2, localhomtable, localhomshrink );
+// msshrinklocalhom( pair[0], pair[1], s1, s2, localhomtable, localhomshrink );
+ if( ntarget < njob )
+ msshrinklocalhom_fast_target( memlist[0], memlist[1], localhomtable, localhomshrink, swaplist, targetmap ); // swaplist hitsuyou!!
+ else
+ msshrinklocalhom_fast_half( memlist[0], memlist[1], localhomtable, localhomshrink );
+ oimpmatchdouble = 0.0;
if( use_fft )
{
if( alg == 'Q' )
{
- part_imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- if( rnakozo ) part_imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
+ fprintf( stderr, "'Q' is no longer supported\n" );
+ exit( 1 );
+ }
+ else if( alg == 'd' )
+ {
+ imp_match_init_strictD( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] );
+ if( rnakozo ) imp_rnaD( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
for( i=length-1; i>=0; i-- )
{
- oimpmatch += part_imp_match_out_scQ( i, i );
+ oimpmatchdouble += (double)imp_match_out_scD( i, i );
// fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] );
}
}
else
{
- part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
+ part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] );
if( rnakozo ) part_imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
for( i=length-1; i>=0; i-- )
{
- oimpmatch += part_imp_match_out_sc( i, i );
+ oimpmatchdouble += (double)part_imp_match_out_sc( i, i );
// fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] );
}
}
{
if( alg == 'Q' )
{
- imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
-
- for( i=length-1; i>=0; i-- )
- {
- oimpmatch += imp_match_out_scQ( i, i );
-// fprintf( stderr, "#### i=%d, initial impmatch = %f\n", i, oimpmatch );
- }
+ fprintf( stderr, "'Q' is no longer supported\n" );
+ exit( 1 );
}
else
{
- imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
+ imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] );
fprintf( stderr, "not supported\n" );
exit( 1 );
for( i=length-1; i>=0; i-- )
{
- oimpmatch += imp_match_out_sc( i, i );
+ oimpmatchdouble += (double)imp_match_out_sc( i, i );
// fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] );
}
}
}
else
{
- oimpmatch = 0.0;
+ if( RNAscoremtx == 'r' )
+ intergroup_score_gapnomi( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
+ else
+ {
+ if( smalldistmtx )
+#if 1
+ intergroup_score_multimtx( whichmtx, scoringmatrices, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble );
+#else
+ intergroup_score_dynmtx( smalldistmtx, amino_dis, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
+#endif
+ else
+ intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
+ }
+ oimpmatchdouble = 0.0;
}
// fprintf( stderr, "#### tmpdouble = %f\n", tmpdouble );
- mscore = (double)oimpmatch + tmpdouble;
+ mscore = oimpmatchdouble + tmpdouble;
}
else
{
fprintf( stderr, "score_check = %d\n", score_check );
- fprintf( stderr, "Not supported\n" );
+ fprintf( stderr, "Not supported. Please add --threadit 0 to disable the multithreading in the iterative refinement calculation.\n" );
exit( 1 );
}
// if( rnakozo ) foldalignedrna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, rnapairboth );
// if( !use_fft && !rnakozo )
+// if( !use_fft )
if( !use_fft )
{
commongappick_record( clus1, mseq1, gapmap1 );
for( i=0; i<clus2; i++ ) printf( "%f ", effarr2[i] );
printf( "\n" );
#endif
- if( constraint == 2 )
+ if( !skipthisbranch[l][k] )
{
- if( use_fft )
- {
-// if( alg == 'Q' )
-// part_imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
-// else
-// part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
- chudanres = 0;
- Falign_localhom( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, gapmap1, gapmap2, subgenerationpt, subgenerationatfirst, &chudanres );
-// fprintf( stderr, "##### impmatch = %f\n", impmatch );
- if( chudanres && parallelizationstrategy == BAATARI2 )
- {
-// fprintf( stderr, "#### yarinaoshi!!! INS-i\n" );
- goto yarinaoshi;
- }
- }
- else
- {
- if( alg == 'Q' )
- {
- float wm;
-// imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); // Ichijiteki, gapmap wo tsukuttakara iranai.
-// if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, gapmap1, gapmap2, rnapairboth );
- wm = Q__align_gapmap( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, NULL, NULL, NULL, NULL, gapmap1, gapmap2 );
- fprintf( stderr, "wm = %f\n", wm );
-#if 0
- fprintf( stderr, "##### impmatch = %f->%f\n", oimpmatch, impmatch );
- naivescore1 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + impmatch;
- fprintf( stderr, "##### naivscore1 = %f\n", naivescore1 );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 );
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
-#if 0 // chuui
- if( abs( wm - naivescore1 ) > 100 )
- {
-// fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 );
-// rewind( stdout );
-// for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
-// for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
-// exit( 1 );
- }
-#endif
-#endif
- }
- else if( alg == 'R' )
- {
- float wm;
- imp_match_init_strictR( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); // Ichijiteki, gapmap ha mada
- wm = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, NULL, NULL, NULL, NULL );
-// fprintf( stderr, "##### impmatch = %f->%f\n", oimpmatch, impmatch );
- naivescore1 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + impmatch;
-// fprintf( stderr, "##### naivscore1 = %f\n", naivescore1 );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: %f->%f UP!\n", clus1, clus2, naivescore0, naivescore1 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 );
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
-#if 0 // chuui
- if( abs( wm - naivescore1 ) > 100 )
- {
-// fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 );
- rewind( stdout );
- for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
- for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
- exit( 1 );
- }
-#endif
- }
- else if( alg == 'H' )
+ if( constraint == 2 )
+ {
+ if( use_fft )
{
- float wm;
- imp_match_init_strictH( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); // Ichijiteki, gapmap ha mada
- wm = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, NULL, NULL, NULL, NULL );
- fprintf( stderr, "##### impmatch = %f->%f\n", oimpmatch, impmatch );
- naivescore1 = naivepairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + impmatch;
- fprintf( stderr, "##### naivscore1 = %f\n", naivescore1 );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 );
+// if( alg == 'Q' )
+// part_imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
+// else
+// part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
+ chudanres = 0;
+ if( alg == 'd' )
+ D__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatchdouble, NULL, NULL, NULL, NULL, subgenerationpt, subgenerationatfirst, &chudanres, 1, 1 );
+
else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
-#if 0 // chuui
- if( abs( wm - naivescore1 ) > 100 )
+ Falign_localhom( whichmtx, scoringmatrices, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, eff1s, eff2s, clus1, clus2, alloclen, localhomshrink, &impmatchdouble, gapmap1, gapmap2, subgenerationpt, subgenerationatfirst, &chudanres );
+// fprintf( stderr, "##### impmatch = %f\n", impmatch );
+ if( chudanres && parallelizationstrategy == BAATARI2 )
{
-// fprintf( stderr, "WARNING, totalwm=%f but naivescore=%f\n", totalwm, naivescore1 );
-// for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
-// for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
-// exit( 1 );
+// fprintf( stderr, "#### yarinaoshi!!! INS-i\n" );
+ goto yarinaoshi;
}
-#endif
}
else
{
-// imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
- A__align_gapmap( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, gapmap1, gapmap2 );
- fprintf( stderr, "A__align_gapmap\n" );
-// fprintf( stderr, "##### impmatch = %f\n", impmatch );
+ fprintf( stderr, "Not supported\n" );
+ exit( 1 );
}
}
- }
- else if( use_fft )
- {
- float totalwm;
- chudanres = 0;
- totalwm = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum, subgenerationpt, subgenerationatfirst, &chudanres );
- if( chudanres && parallelizationstrategy == BAATARI2 )
+ else if( use_fft )
{
-// fprintf( stderr, "#### yarinaoshi!!! FFT-NS-i\n" );
- goto yarinaoshi;
- }
-
-// fprintf( stderr, "totalwm = %f\n", totalwm );
#if 0
- if( alg == 'Q' )
- {
- fprintf( stderr, "totalwm = %f\n", totalwm );
- naivescore1 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 );
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
-#if 1 // chuui
- if( totalwm != 0.0 && abs( totalwm - naivescore1 ) > 100 )
+ if( alg == 'd' )
{
-// fprintf( stderr, "WARNING, totalwm=%f but naivescore=%f\n", totalwm, naivescore1 );
-// for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
-// for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
-// exit( 1 );
+ D__align_variousdist( whichmtx, scoringmatrices, NULL, mseq1, mseq2, effarr1, effarr2, eff1s, eff2s, clus1, clus2, alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, subgenerationpt, subgenerationatfirst, &chudanres, 1, 1 );
}
-#endif
- }
-#endif
- if( alg == 'R' )
- {
- fprintf( stderr, "totalwm = %f\n", totalwm );
- naivescore1 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 );
else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
-#if 1 // chuui
- if( totalwm != 0.0 && abs( totalwm - naivescore1 ) > 100 )
+#endif
{
-// fprintf( stderr, "WARNING, totalwm=%f but naivescore=%f\n", totalwm, naivescore1 );
-// for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
-// for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
-// exit( 1 );
+ double totalwm;
+ chudanres = 0;
+// totalwm = Falign( smalldistmtx, NULL, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, NULL, NULL, clus1, clus2, alloclen, &intdum, subgenerationpt, subgenerationatfirst, &chudanres );
+ totalwm = Falign( whichmtx, scoringmatrices, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, eff1s, eff2s, clus1, clus2, alloclen, &intdum, subgenerationpt, subgenerationatfirst, &chudanres );
}
- }
-#endif
- }
- else
- {
- if( alg == 'M' )
- {
- chudanres = 0;
- MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL, subgenerationpt, subgenerationatfirst, &chudanres, outgap, outgap );
if( chudanres && parallelizationstrategy == BAATARI2 )
{
-// fprintf( stderr, "#### yarinaoshi!!! NW-NS-i\n" );
+// fprintf( stderr, "#### yarinaoshi!!! FFT-NS-i\n" );
goto yarinaoshi;
}
+
}
- else if( alg == 'A' )
+ else
{
- A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); //outgap==1
+ fprintf( stderr, "\n\nUnexpected error. Please contact kazutaka.katoh@aist.go.jp\n\n\n" );
+ exit( 1 );
}
- else if( alg == 'Q' )
+// fprintf( stderr, "## impmatch = %f\n", impmatch );
+
+#if 1
+ if( parallelizationstrategy == BAATARI2 && *subgenerationpt != subgenerationatfirst )
{
- float wm;
- wm = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL );
- fprintf( stderr, "wm = %f\n", wm );
- fprintf( stderr, "impmatch = %f\n", impmatch );
- naivescore1 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN!\n", clus1, clus2 );
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
-#if 1 // chuui
- if( abs( wm - naivescore1 ) > 100 )
- {
-// fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 );
-// rewind( stderr );
-// rewind( stdout );
-// for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
-// for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
-// exit( 1 );
- }
-#endif
+// fprintf( stderr, "\nYarinaoshi2!! (Thread %d)\n", thread_no );
+ goto yarinaoshi;
}
- else if( alg == 'R' )
- {
- float wm;
- wm = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL );
- naivescore1 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 );
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
-#if 1 // chuui
- if( abs( wm - naivescore1 ) > 100 )
- {
-// fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 );
-// rewind( stderr );
-// rewind( stdout );
-// for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
-// for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
-// exit( 1 );
- }
#endif
- }
- else if( alg == 'H' )
- {
- float wm;
- wm = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL );
- naivescore1 = naivepairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- {
- fprintf( stderr, "%d-%d, ns: DOWN!\n", clus1, clus2 );
- }
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
+
+ identity = !strcmp( localcopy[s1], mastercopy[s1] );
+ identity *= !strcmp( localcopy[s2], mastercopy[s2] );
+ fprintf( stderr, "%03d-%04d-%d (thread %4d) identical \r", iterate+1, *ndonept, k, thread_no );
-#if 0 // chuui
- if( abs( wm - naivescore1 ) > 100 )
- {
- rewind( stdout );
- for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
- for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
- exit( 1 );
- }
-#endif
- }
- else if( alg == 'a' )
- {
- Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
- }
- else ErrorExit( "Sorry!" );
}
-// fprintf( stderr, "## impmatch = %f\n", impmatch );
-
-#if 1
- if( parallelizationstrategy == BAATARI2 && *subgenerationpt != subgenerationatfirst )
+ else
{
-// fprintf( stderr, "\nYarinaoshi2!! (Thread %d)\n", thread_no );
- goto yarinaoshi;
+ identity = 1;
+ fprintf( stderr, "%03d-%04d-%d (thread %4d) skip \r", iterate+1, *ndonept, k, thread_no );
}
-#endif
-
- identity = !strcmp( localcopy[s1], mastercopy[s1] );
- identity *= !strcmp( localcopy[s2], mastercopy[s2] );
/* Bug? : idnetitcal but score change when scoreing mtx != JTT */
if( identity )
{
tscore = mscore;
-// if( !devide ) fprintf( trap_g, "tscore = %f identical.\n", tscore );
-// fprintf( stderr, " identical." );
- fprintf( stderr, "%03d-%04d-%d (thread %4d) identical \r", iterate+1, *ndonept, k, thread_no );
}
else
{
intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble );
#endif
- tscore = impmatch + tmpdouble;
+ tscore = impmatchdouble + tmpdouble;
// fprintf( stderr, "tmpdouble=%f, impmatch = %f -> %f, tscore = %f\n", tmpdouble, oimpmatch, impmatch, tscore );
}
else
{
- intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble );
+ if( smalldistmtx )
+#if 1
+ intergroup_score_multimtx( whichmtx, scoringmatrices, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble );
+#else
+ intergroup_score_dynmtx( smalldistmtx, amino_dis, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble );
+#endif
+ else
+ intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble );
tscore = tmpdouble;
}
// fprintf( stderr, "#######ii=%d, iterate=%d score = %f -> %f \n", ii, iterate , mscore, tscore );
goto yarinaoshi;
}
#endif
+// reporterr( "tscore = %f, mscore = %f\n", tscore, mscore );
gain = tscore - ( mscore - cut/100.0*mscore );
if( gain > 0 )
{
tscore = mscore;
}
}
+#if FULLSCORE
+ {
+ int j;
+ double fullscore = 0.0;
+ for( i=1; i<locnjob; i++ ) for( j=0; j<i; j++ )
+ fullscore += (double)naivepairscore11( localcopy[i], localcopy[j], penalty );
+ reporterr( "\n######## fullscore = %f\n", fullscore / (locnjob*(locnjob-1)/2) );
+ }
+#endif
+
+
converged2 = 0;
for( ii=iterate-2; ii>=0; ii-=1 )
{
int TreeDependentIteration( int locnjob, char **name, int nlen[M],
char **aseq, char **bseq, int ***topol, double **len,
+ double **distmtx,
+ int **skipthisbranch,
int alloclen, LocalHom **localhomtable,
RNApair ***singlerna,
- int nkozo, char *kozoarivec )
+ int nkozo, char *kozoarivec,
+ int ntarget, int *targetmap, int *targetmapr )
{
int i, j, k, l, iterate, ii, iu, ju;
int lin, ldf, length;
int s1, s2;
static double **imanoten;
static Node *stopol;
+ static double *distarr = NULL;
static double *effarrforlocalhom = NULL;
static double *effarr = NULL;
static double *effarr1 = NULL;
static int *branchnode = NULL;
static double **branchWeight = NULL;
static char **mseq1, **mseq2;
- static float ***history;
+ static double ***history;
FILE *trap;
double tscore, mscore;
int identity;
int converged;
int oscillating;
- float naivescore0 = 0.0; // by D.Mathog, a guess
- float naivescore1;
+// double naivescore0 = 0.0; // by D.Mathog, a guess
+// double naivescore1;
#if 0
char pair[njob][njob];
#else
- static char **pair;
+ static int **memlist;
+ static char *pairbuf;
#endif
#if DEBUG + RECORD
double score_for_check0, score_for_check1;
#endif
static char *indication1, *indication2;
static LocalHom ***localhomshrink = NULL;
- float impmatch = 0.0, oimpmatch = 0.0;
+ static char *swaplist = NULL;
+ double impmatchdouble = 0.0;
+ double oimpmatchdouble = 0.0;
static int *gapmap1;
static int *gapmap2;
double tmpdouble;
static RNApair *rnapairboth;
RNApair ***grouprna1, ***grouprna2;
double unweightedspscore;
+ static double **smalldistmtx;
+ static double ***scoringmatrices;
+ static double **eff1s, **eff2s;
+ static int **whichmtx;
+ int value;
if( rnakozo && rnaprediction == 'm' )
{
if( effarr == NULL ) /* locnjob == njob ni kagiru */
{
- indication1 = AllocateCharVec( njob*3+50 );
- indication2 = AllocateCharVec( njob*3+50 );
+ indication1 = AllocateCharVec( 150 );
+ indication2 = AllocateCharVec( 150 );
effarr = AllocateDoubleVec( locnjob );
+ distarr = AllocateDoubleVec( locnjob );
effarrforlocalhom = AllocateDoubleVec( locnjob );
effarr1 = AllocateDoubleVec( locnjob );
effarr2 = AllocateDoubleVec( locnjob );
gapmap1 = AllocateIntVec( alloclen );
gapmap2 = AllocateIntVec( alloclen );
if( score_check == 2 ) imanoten = AllocateDoubleMtx( njob, njob );
+ if( specificityconsideration != 0 )
+ {
+ smalldistmtx = AllocateDoubleMtx( locnjob, locnjob ); // ookii?
+ scoringmatrices = AllocateDoubleCub( maxdistclass, nalphabets, nalphabets );
+ makescoringmatrices( scoringmatrices, n_dis_consweight_multi );
+ eff1s = AllocateDoubleMtx( maxdistclass, locnjob );
+ eff2s = AllocateDoubleMtx( maxdistclass, locnjob );
+ whichmtx = AllocateIntMtx( locnjob, locnjob );
+ }
+ else
+ {
+ smalldistmtx = NULL;
+ scoringmatrices = NULL;
+ eff1s = eff2s = NULL;
+ whichmtx = NULL;
+ }
effarr1_kozo = AllocateDoubleVec( locnjob ); // tsuneni allocate suru.
effarr2_kozo = AllocateDoubleVec( locnjob ); // tsuneni allocate suru.
#if 0
#else
- pair = AllocateCharMtx( locnjob, locnjob );
+ pairbuf = AllocateCharVec( locnjob );
+ memlist = AllocateIntMtx( 2, locnjob );
if( rnakozo ) rnapairboth = (RNApair *)calloc( alloclen, sizeof( RNApair ) );
+ swaplist = NULL;
if( constraint )
{
+ if( ntarget < locnjob ) swaplist = calloc( njob, sizeof( char ) );
localhomshrink = (LocalHom ***)calloc( njob, sizeof( LocalHom ** ) );
for( i=0; i<njob; i++)
{
if( constraint )
{
counteff_simple( locnjob, topol, len, effarrforlocalhom );
- calcimportance( locnjob, effarrforlocalhom, aseq, localhomtable );
+ if( ntarget < locnjob )
+ calcimportance_target( locnjob, ntarget, effarrforlocalhom, aseq, localhomtable, targetmap, targetmapr );
+ else
+ calcimportance_half( locnjob, effarrforlocalhom, aseq, localhomtable );
}
if( weight == 2 )
fprintf( stderr, "Not supported, weight=%d nkozo=%d.\n", weight, nkozo );
}
}
- else if( weight == 4 )
+// else if( weight == 4 )
+// else if( weight == 4 || weight == 0 )
+ else if( locnjob > 2 && ( weight == 4 || weight == 0 ) )
{
treeCnv( stopol, locnjob, topol, len, branchWeight );
- calcBranchWeight( branchWeight, locnjob, stopol, topol, len );
+ calcBranchWeight( branchWeight, locnjob, stopol, topol, len ); // IRU!!!
}
}
int jobposint;
int generationofmastercopy;
int subgeneration;
- float basegain;
+ double basegain;
int *generationofinput;
- float *gainlist;
- float *tscorelist;
+ double *gainlist;
+ double *tscorelist;
int ndone;
int ntry;
int collecting;
int maxiter;
char ***candidates;
int *branchtable;
- float **tscorehistory_detail;
+ double **tscorehistory_detail;
int finish;
nwa = nthread + 1;
pthread_cond_init( &collection_end, NULL );
pthread_cond_init( &collection_start, NULL );
- gainlist = calloc( nwa, sizeof( float ) );
- tscorelist = calloc( nwa, sizeof( float ) );
+ gainlist = calloc( nwa, sizeof( double ) );
+ tscorelist = calloc( nwa, sizeof( double ) );
branchtable = calloc( nbranch, sizeof( int ) );
generationofinput = calloc( nbranch, sizeof( int ) );
if( parallelizationstrategy == BESTFIRST )
targ[i].alloclen = alloclen;
targ[i].stopol = stopol;
targ[i].topol = topol;
-// targ[i].len = len;
+ targ[i].skipthisbranch = skipthisbranch;
+ targ[i].distmtx = distmtx;
+ targ[i].len = len;
targ[i].mutex = &mutex;
targ[i].collection_end = &collection_end;
targ[i].collection_start = &collection_start;
targ[i].tscorehistory_detail = tscorehistory_detail;
+ targ[i].ntarget = ntarget;
+ targ[i].targetmap = targetmap;
targ[i].finishpt = &finish;
pthread_create( handle+i, NULL, athread, (void *)(targ+i) );
if( weight == 2 )
countnode_int( locnjob, topol, node );
- else if( weight == 4 )
+// else if( weight == 4 )
+// else if( weight == 4 || weight == 0 )
+ else if( locnjob > 2 && ( weight == 4 || weight == 0 ) )
{
treeCnv( stopol, locnjob, topol, len, branchWeight );
- calcBranchWeight( branchWeight, locnjob, stopol, topol, len );
+ calcBranchWeight( branchWeight, locnjob, stopol, topol, len ); // IRU!!!
}
trap = fopen( "hat2", "w" );
if( !trap ) ErrorExit( "Cannot open hat2." );
if( constraint )
{
counteff_simple( locnjob, topol, len, effarrforlocalhom );
- calcimportance( locnjob, effarrforlocalhom, aseq, localhomtable );
+ if( ntarget < locnjob )
+ calcimportance_target( locnjob, ntarget, effarrforlocalhom, aseq, localhomtable, targetmap, targetmapr );
+ else
+ calcimportance_half( locnjob, effarrforlocalhom, aseq, localhomtable );
}
}
if( l == locnjob-2 ) k = 1;
else k = myjob - l * 2;
#endif
+ #if 0 // IRANAI!!!!
+ fprintf( stderr, "\nSTEP %d-%d\n", l, k );
+ for( i=0; topol[l][k][i]!=-1; i++ ) fprintf( stderr, " %d ", topol[l][k][i]+1 );
+ fprintf( stderr, "\n" );
+ fprintf( stderr, "SKIP %d\n", skipthisbranch[l][k] );
+ #endif
#if 1
fprintf( stderr, "STEP %03d-%03d-%d ", iterate+1, l+1, k );
fflush( stderr );
#else
fprintf( stderr, "STEP %03d-%03d-%d %s", iterate+1, l+1, k, use_fft?"\n":"\n" );
#endif
- for( i=0; i<locnjob; i++ ) for( j=0; j<locnjob; j++ ) pair[i][j] = 0;
-
- OneClusterAndTheOther( locnjob, pair, &s1, &s2, topol, l, k );
- #if 0
+ if( skipthisbranch[l][k] )
+ {
+ fprintf( stderr, " skip. \r" );
+ continue;
+ }
+
+ distFromABranch( locnjob, distarr, stopol, topol, len, l, k ); // ato de dou. weight 4 igai demo tsukaeruyou ni.
+// for( i=0; i<locnjob; i++ ) // BUG??
+// for( i=0; i<2; i++ ) for( j=0; j<locnjob; j++ ) pair[i][j] = 0;
+ OneClusterAndTheOther_fast( locnjob, memlist[0], memlist[1], &s1, &s2, pairbuf, topol, l, k, smalldistmtx, distmtx, distarr );
+
+
+
+
+// reporterr( "\n\n##### memlist[0][0] = %d\n", memlist[0][0]+1 );
+// reporterr( "##### memlist[1][0] = %d\n\n", memlist[1][0]+1 );
+
+
+
+ #if 0 // IRANAI!!!!
fprintf( stderr, "STEP%d-%d\n", l, k );
- for( i=0; i<locnjob; i++ )
+ for( i=0; i<2; i++ )
{
for( j=0; j<locnjob; j++ )
{
}
else if( weight == 4 )
{
+
weightFromABranch( locnjob, effarr, stopol, topol, l, k );
+
#if 0
if( nkozo )
{
if( nkozo )
{
- #if 1
- double tmptmptmp;
- tmptmptmp = 0.0;
- clus1 = conjuctionfortbfast_kozo( &tmptmptmp, pair, s1, aseq, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
+// double tmptmptmp;
+// tmptmptmp = 0.0;
+// clus1 = conjuctionfortbfast_kozo( &tmptmptmp, pair[0], s1, aseq, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
+ clus1 = fastconjuction_noname_kozo( memlist[0], aseq, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
for( i=0; i<clus1; i++ ) effarr1_kozo[i] *= 1.0; // 0.5 ga sairyo ?
- tmptmptmp = 0.0;
- clus2 = conjuctionfortbfast_kozo( &tmptmptmp, pair, s2, aseq, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
+// tmptmptmp = 0.0;
+// clus2 = conjuctionfortbfast_kozo( &tmptmptmp, pair[1], s2, aseq, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
+ clus2 = fastconjuction_noname_kozo( memlist[1], aseq, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
for( i=0; i<clus2; i++ ) effarr2_kozo[i] *= 1.0; // 0.5 ga sairyo ?
#if 0
for( i=0; i<clus2; i++ ) fprintf( stderr, "effarr2_kozo[%d], effarr2[] = %f, %f\n", i, effarr2_kozo[i], effarr2[i] );
#endif
-
-
-
-
- #else
- clus1 = conjuctionfortbfast_kozo_BUG( pair, s1, aseq, mseq1, effarr1, effarr, effarr1_kozo, effarr_kozo, indication1 );
- clus2 = conjuctionfortbfast_kozo_BUG( pair, s2, aseq, mseq2, effarr2, effarr, effarr2_kozo, effarr_kozo, indication2 );
- #endif
}
else
{
- clus1 = conjuctionfortbfast( pair, s1, aseq, mseq1, effarr1, effarr, indication1 );
- clus2 = conjuctionfortbfast( pair, s2, aseq, mseq2, effarr2, effarr, indication2 );
+// clus1 = conjuctionfortbfast( pair[0], s1, aseq, mseq1, effarr1, effarr, indication1 );
+// clus2 = conjuctionfortbfast( pair[1], s2, aseq, mseq2, effarr2, effarr, indication2 );
+ clus1 = fastconjuction_noname( memlist[0], aseq, mseq1, effarr1, effarr, indication1, minimumweight ); // 2015/Apr/18
+ clus2 = fastconjuction_noname( memlist[1], aseq, mseq2, effarr2, effarr, indication2, minimumweight ); // 2015/Apr/18
}
-
if( rnakozo && rnaprediction == 'm' )
{
- makegrouprnait( grouprna1, singlerna, pair, s1 );
- makegrouprnait( grouprna2, singlerna, pair, s2 );
+// makegrouprnait( grouprna1, singlerna, pair[0], s1 );
+// makegrouprnait( grouprna2, singlerna, pair[1], s2 );
+ makegrouprna( grouprna1, singlerna, memlist[0] );
+ makegrouprna( grouprna2, singlerna, memlist[1] );
}
+ if( smalldistmtx )
+ {
+ classifypairs( clus1, eff1s, effarr1, clus2, eff2s, effarr2, smalldistmtx, whichmtx, maxdistclass );
+ }
+
+ if( alg == 'd' ) // Ichijiteki, koredeha scorecheck ga dekinai. gapmap ni taiou surumade
+ {
+ commongappick( clus1, mseq1 );
+ commongappick( clus2, mseq2 );
+ }
+
+
+
if( score_check == 2 )
{
+ fprintf( stderr, "Not supported\n" );
+ exit( 1 );
if( constraint )
{
- // msshrinklocalhom( pair, s1, s2, localhomtable, localhomshrink );
- shrinklocalhom( pair, s1, s2, localhomtable, localhomshrink );
- oimpmatch = 0.0;
+// msshrinklocalhom( pair[0], pair[1], s1, s2, localhomtable, localhomshrink );
+ if( ntarget < locnjob )
+ msshrinklocalhom_fast_target( memlist[0], memlist[1], localhomtable, localhomshrink, swaplist, targetmap );
+ else
+ msshrinklocalhom_fast_half( memlist[0], memlist[1], localhomtable, localhomshrink );
+ oimpmatchdouble = 0.0;
if( use_fft )
{
if( alg == 'Q' )
{
- part_imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- if( rnakozo ) part_imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
- for( i=length-1; i>=0; i-- ) oimpmatch += part_imp_match_out_scQ( i, i );
+ fprintf( stderr, "'Q' is no longer supported\n" );
+ exit( 1 );
}
else
{
- part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
+ part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] );
if( rnakozo ) part_imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
- for( i=length-1; i>=0; i-- ) oimpmatch += part_imp_match_out_sc( i, i );
+ for( i=length-1; i>=0; i-- ) oimpmatchdouble += (double)part_imp_match_out_sc( i, i );
}
}
else
{
if( alg == 'Q' )
{
- imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
- for( i=length-1; i>=0; i-- ) oimpmatch += imp_match_out_scQ( i, i );
+ fprintf( stderr, "'Q' is no longer supported\n" );
+ exit( 1 );
}
else
{
- imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
+ imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] );
fprintf( stderr, "not supported\n" );
exit( 1 );
}
}
else
{
- oimpmatch = 0.0;
+ oimpmatchdouble = 0.0;
}
+#if 0
tmpdouble = 0.0;
iu=0;
for( i=s1; i<locnjob; i++ )
{
- if( !pair[s1][i] ) continue;
+ if( !pair[0][i] ) continue;
ju=0;
for( j=s2; j<locnjob; j++ )
{
- if( !pair[s2][j] ) continue;
+ if( !pair[1][j] ) continue;
// fprintf( stderr, "i = %d, j = %d, effarr1=%f, effarr2=%f\n", i, j, effarr1[iu], effarr2[ju] );
tmpdouble += effarr1[iu] * effarr2[ju] * imanoten[MIN(i,j)][MAX(i,j)];
ju++;
}
iu++;
}
- mscore = oimpmatch + tmpdouble;
+#else // not yet checked
+ fprintf( stderr, "##### NOT YET CHECKED!!!!\n" );
+ exit( 1 );
+ tmpdouble = 0.0;
+ iu=0;
+ for( i=0; (s1=memlist[0][i])!=-1; i++ )
+ {
+ ju=0;
+ for( j=0; (s2=memlist[1][j])!=-1; j++ )
+ {
+ // fprintf( stderr, "i = %d, j = %d, effarr1=%f, effarr2=%f\n", i, j, effarr1[iu], effarr2[ju] );
+ tmpdouble += effarr1[iu] * effarr2[ju] * imanoten[MIN(s1,s2)][MAX(s1,s2)];
+ ju++;
+ }
+ iu++;
+ }
+#endif
+ mscore = oimpmatchdouble + tmpdouble;
}
else if( score_check )
{
- #if 1
- if( RNAscoremtx == 'r' )
- intergroup_score_gapnomi( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
- else
- intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
- #else
- intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
- #endif
-
if( constraint )
{
- shrinklocalhom( pair, s1, s2, localhomtable, localhomshrink );
+ if( RNAscoremtx == 'r' )
+ intergroup_score_gapnomi( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
+ else
+ intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
+
+// shrinklocalhom( pair, s1, s2, localhomtable, localhomshrink );
+ if( ntarget < locnjob )
+ msshrinklocalhom_fast_target( memlist[0], memlist[1], localhomtable, localhomshrink, swaplist, targetmap );
+ else
+ msshrinklocalhom_fast_half( memlist[0], memlist[1], localhomtable, localhomshrink );
// weightimportance4( clus1, clus2, effarr1, effarr2, localhomshrink ); // >>>
- oimpmatch = 0.0;
+ oimpmatchdouble = 0.0;
if( use_fft )
{
if( alg == 'Q' )
{
- part_imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- if( rnakozo ) part_imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
+ fprintf( stderr, "'Q' is no longer supported\n" );
+ exit( 1 );
+ }
+ else if( alg == 'd' )
+ {
+ imp_match_init_strictD( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] );
+
for( i=length-1; i>=0; i-- )
{
- oimpmatch += part_imp_match_out_scQ( i, i );
- // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] );
+ oimpmatchdouble += (double)imp_match_out_scD( i, i );
+ // fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] );
}
}
else
{
- part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
+ part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] );
if( rnakozo ) part_imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
for( i=length-1; i>=0; i-- )
{
- oimpmatch += part_imp_match_out_sc( i, i );
+ oimpmatchdouble += (double)part_imp_match_out_sc( i, i );
// fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] );
}
}
{
if( alg == 'Q' )
{
- imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
-
- for( i=length-1; i>=0; i-- )
- {
- oimpmatch += imp_match_out_scQ( i, i );
- // fprintf( stderr, "#### i=%d, initial impmatch = %f\n", i, oimpmatch );
- }
+ fprintf( stderr, "'Q' is no longer supported\n" );
+ exit( 1 );
}
else
{
- imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
+ imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] );
fprintf( stderr, "not supported\n" );
exit( 1 );
for( i=length-1; i>=0; i-- )
{
- oimpmatch += imp_match_out_sc( i, i );
+ oimpmatchdouble += (double)imp_match_out_sc( i, i );
// fprintf( stderr, "#### i=%d, initial impmatch = %f seq1 = %c, seq2 = %c\n", i, oimpmatch, mseq1[0][i], mseq2[0][i] );
}
}
}
else
{
- oimpmatch = 0.0;
+ if( RNAscoremtx == 'r' )
+ intergroup_score_gapnomi( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
+ else
+ {
+ if( smalldistmtx )
+#if 1
+ intergroup_score_multimtx( whichmtx, scoringmatrices, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble );
+#else
+ intergroup_score_dynmtx( offsetmtx, amino_dis, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // n_dis ha machigai
+#endif
+ else
+ intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // gappick mae denaito dame
+ }
+ oimpmatchdouble = 0.0;
}
// fprintf( stderr, "#### tmpdouble = %f\n", tmpdouble );
- mscore = (double)oimpmatch + tmpdouble;
+ mscore = oimpmatchdouble + tmpdouble;
}
else
{
- // fprintf( stderr, "score_check = %d\n", score_check );
- /* atode kousokuka */
+// fprintf( stderr, "score_check = %d\n" );
+#if 1
+ /* Oscilation check no tame hitsuyou! atode kousokuka */
intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble );
mscore = tmpdouble;
/* atode kousokuka */
+#else
+ mscore = 0.0;
+#endif
if( constraint )
{
- oimpmatch = 0.0;
- shrinklocalhom( pair, s1, s2, localhomtable, localhomshrink );
+ oimpmatchdouble = 0.0;
+// shrinklocalhom( pair, s1, s2, localhomtable, localhomshrink );
+ if( ntarget < locnjob )
+ msshrinklocalhom_fast_target( memlist[0], memlist[1], localhomtable, localhomshrink, swaplist, targetmap );
+ else
+ msshrinklocalhom_fast_half( memlist[0], memlist[1], localhomtable, localhomshrink );
if( use_fft )
{
if( alg == 'Q' )
{
- part_imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- if( rnakozo ) part_imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
+ fprintf( stderr, "'Q' is no longer supported\n" );
+ exit( 1 );
+ }
+ else if( alg == 'd' )
+ {
+ imp_match_init_strictD( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] );
+ if( rnakozo ) imp_rnaD( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
}
else
{
- part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
+ part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] );
if( rnakozo ) part_imp_rna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
}
}
{
if( alg == 'Q' )
{
- imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
- if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, grouprna1, grouprna2, gapmap1, gapmap2, NULL );
+ fprintf( stderr, "'Q' is no longer supported\n" );
+ exit( 1 );
}
else
{
- imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
+ imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, swaplist, 1, memlist[0], memlist[1] );
fprintf( stderr, "Not supported\n" );
exit( 1 );
}
// if( rnakozo ) foldalignedrna( clus1, clus2, mseq1, mseq2, effarr1, effarr2, rnapairboth );
// if( !use_fft && !rnakozo )
+// if( !use_fft )
if( !use_fft )
{
commongappick_record( clus1, mseq1, gapmap1 );
// part_imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 );
// else
// part_imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
- Falign_localhom( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, gapmap1, gapmap2, NULL, 0, NULL );
- // fprintf( stderr, "##### impmatch = %f\n", impmatch );
- }
- else
- {
- if( alg == 'Q' )
+ if( alg == 'd' )
{
- float wm;
- // imp_match_init_strictQ( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); // Ichijiteki, gapmap wo tsukuttakara iranai.
- // if( rnakozo ) imp_rnaQ( clus1, clus2, mseq1, mseq2, effarr1, effarr2, gapmap1, gapmap2, rnapairboth );
-
- wm = Q__align_gapmap( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, NULL, NULL, NULL, NULL, gapmap1, gapmap2 );
- fprintf( stderr, "wm = %f\n", wm );
- #if 0
- fprintf( stderr, "##### impmatch = %f->%f\n", oimpmatch, impmatch );
- naivescore1 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + impmatch;
- fprintf( stderr, "##### naivscore1 = %f\n", naivescore1 );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 );
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
- #if 0 // chuui
- if( abs( wm - naivescore1 ) > 100 )
+ if( scoringmatrices ) // called by tditeration.c
{
- // fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 );
- // rewind( stdout );
- // for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
- // for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
- // exit( 1 );
+ D__align_variousdist( whichmtx, scoringmatrices, NULL, mseq1, mseq2, effarr1, effarr2, eff1s, eff2s, clus1, clus2, alloclen, localhomshrink, &impmatchdouble, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 );
}
- #endif
- #endif
- }
- else if( alg == 'R' )
- {
- float wm;
- imp_match_init_strictR( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); // Ichijiteki, gapmap ha mada
- wm = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, NULL, NULL, NULL, NULL );
- // fprintf( stderr, "##### impmatch = %f->%f\n", oimpmatch, impmatch );
- naivescore1 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + impmatch;
- // fprintf( stderr, "##### naivscore1 = %f\n", naivescore1 );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: %f->%f UP!\n", clus1, clus2, naivescore0, naivescore1 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 );
else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
- #if 0 // chuui
- if( abs( wm - naivescore1 ) > 100 )
{
- // fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 );
- rewind( stdout );
- for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
- for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
- exit( 1 );
+ D__align( n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatchdouble, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 );
}
- #endif
- }
- else if( alg == 'H' )
- {
- float wm;
- imp_match_init_strictH( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, localhomshrink, 1 ); // Ichijiteki, gapmap ha mada
- wm = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, NULL, NULL, NULL, NULL );
- fprintf( stderr, "##### impmatch = %f->%f\n", oimpmatch, impmatch );
- naivescore1 = naivepairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty ) + impmatch;
- fprintf( stderr, "##### naivscore1 = %f\n", naivescore1 );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 );
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
- #if 0 // chuui
- if( abs( wm - naivescore1 ) > 100 )
- {
- // fprintf( stderr, "WARNING, totalwm=%f but naivescore=%f\n", totalwm, naivescore1 );
- // for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
- // for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
- // exit( 1 );
- }
- #endif
}
else
- {
- // imp_match_init_strict( NULL, clus1, clus2, length, length, mseq1, mseq2, effarr1, effarr2, effarr1_kozo, effarr2_kozo, localhomshrink, 1 );
- A__align_gapmap( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, localhomshrink, &impmatch, gapmap1, gapmap2 );
- // fprintf( stderr, "##### impmatch = %f\n", impmatch );
- }
+ Falign_localhom( whichmtx, scoringmatrices, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, eff1s, eff2s, clus1, clus2, alloclen, localhomshrink, &impmatchdouble, gapmap1, gapmap2, NULL, 0, NULL );
+ // fprintf( stderr, "##### impmatch = %f\n", impmatch );
+ }
+ else
+ {
+ fprintf( stderr, "\n\nUnexpected error. Please contact kazutaka.katoh@aist.go.jp\n\n\n" );
+ exit( 1 );
}
}
else if( use_fft )
{
- float totalwm;
- totalwm = Falign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, &intdum, NULL, 0, NULL );
-
- // fprintf( stderr, "totalwm = %f\n", totalwm );
- #if 0
- if( alg == 'Q' )
+ double totalwm;
+#if 0
+ double dumdb;
+ // D ha Falign wo toshite yobareru.
+ if( alg == 'd' )
{
- fprintf( stderr, "totalwm = %f\n", totalwm );
- naivescore1 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 );
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
- #if 1 // chuui
- if( totalwm != 0.0 && abs( totalwm - naivescore1 ) > 100 )
- {
- // fprintf( stderr, "WARNING, totalwm=%f but naivescore=%f\n", totalwm, naivescore1 );
- // for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
- // for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
- // exit( 1 );
- }
- #endif
+ D__align_variousdist( whichmtx, scoringmatrices, NULL, mseq1, mseq2, effarr1, effarr2, eff1s, eff2s, clus1, clus2, alloclen, NULL, &dumdb, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 );
}
- #endif
- if( alg == 'R' )
+ else
+#endif
{
- fprintf( stderr, "totalwm = %f\n", totalwm );
- naivescore1 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 );
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
- #if 1 // chuui
- if( totalwm != 0.0 && abs( totalwm - naivescore1 ) > 100 )
- {
- // fprintf( stderr, "WARNING, totalwm=%f but naivescore=%f\n", totalwm, naivescore1 );
- // for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
- // for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
- // exit( 1 );
- }
+ totalwm = Falign( whichmtx, scoringmatrices, n_dis_consweight_multi, mseq1, mseq2, effarr1, effarr2, eff1s, eff2s, clus1, clus2, alloclen, &intdum, NULL, 0, NULL );
+
+ // fprintf( stderr, "totalwm = %f\n", totalwm );
}
- #endif
}
else
{
- if( alg == 'M' )
- {
- MSalignmm( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, NULL, NULL, NULL, NULL, 0, NULL, outgap, outgap );
- }
- else if( alg == 'A' )
- {
- A__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL, NULL, 0, NULL, 1, 1 ); // outgap==1
- }
- else if( alg == 'Q' )
- {
- float wm;
- wm = Q__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL );
- fprintf( stderr, "wm = %f\n", wm );
- fprintf( stderr, "impmatch = %f\n", impmatch );
- naivescore1 = naiveQpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN!\n", clus1, clus2 );
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
- #if 1 // chuui
- if( abs( wm - naivescore1 ) > 100 )
- {
- // fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 );
- // rewind( stderr );
- // rewind( stdout );
- // for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
- // for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
- // exit( 1 );
- }
- #endif
- }
- else if( alg == 'R' )
- {
- float wm;
- wm = R__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL );
- naivescore1 = naiveRpairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- fprintf( stderr, "%d-%d, ns: DOWN! %f->%f\n", clus1, clus2, naivescore0, naivescore1 );
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
- #if 1 // chuui
- if( abs( wm - naivescore1 ) > 100 )
- {
- // fprintf( stderr, "WARNING, wm=%f but naivescore=%f\n", wm, naivescore1 );
- // rewind( stderr );
- // rewind( stdout );
- // for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
- // for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
- // exit( 1 );
- }
- #endif
- }
- else if( alg == 'H' )
- {
- float wm;
- wm = H__align( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen, NULL, &impmatch, NULL, NULL, NULL, NULL );
- naivescore1 = naivepairscore( clus1, clus2, mseq1, mseq2, effarr1, effarr2, penalty );
-
- if( naivescore1 > naivescore0 )
- fprintf( stderr, "%d-%d, ns: UP!\n", clus1, clus2 );
- else if( naivescore1 < naivescore0 )
- {
- fprintf( stderr, "%d-%d, ns: DOWN!\n", clus1, clus2 );
- }
- else
- fprintf( stderr, "%d-%d, ns: IDENTICAL\n", clus1, clus2 );
-
- #if 0 // chuui
- if( abs( wm - naivescore1 ) > 100 )
- {
- rewind( stdout );
- for( i=0; i<clus1; i++ ) printf( ">\n%s\n", mseq1[i] );
- for( i=0; i<clus2; i++ ) printf( ">\n%s\n", mseq2[i] );
- exit( 1 );
- }
- #endif
- }
- else if( alg == 'a' )
- {
- Aalign( mseq1, mseq2, effarr1, effarr2, clus1, clus2, alloclen );
- }
- else ErrorExit( "Sorry!" );
+ fprintf( stderr, "\n\nUnexpected error. Please contact kazutaka.katoh@aist.go.jp\n\n\n" );
+ exit( 1 );
}
// fprintf( stderr, "## impmatch = %f\n", impmatch );
/* Bug? : idnetitcal but score change when scoreing mtx != JTT */
+#if FULLSCORE
+ {
+ double fullscore = 0.0, fullscoreori = 0.0;
+ for( i=1; i<locnjob; i++ ) for( j=0; j<i; j++ )
+ fullscore += (double)naivepairscore11( aseq[i], aseq[j], penalty );
+ reporterr( "\nfullscore = %f\n", fullscore / (locnjob*(locnjob-1)/2) );
+ for( i=1; i<locnjob; i++ ) for( j=0; j<i; j++ )
+ fullscoreori += (double)naivepairscore11( bseq[i], bseq[j], penalty );
+ reporterr( "\nfullscoreori = %f\n", fullscoreori / (locnjob*(locnjob-1)/2) );
+ if( 0 && fullscoreori > fullscore )
+ {
+ for( i=0; i<clus1; i++ )
+ fprintf( stdout, ">group1\n%s\n", mseq1[i] );
+ for( i=0; i<clus2; i++ )
+ fprintf( stdout, ">group2\n%s\n", mseq2[i] );
+
+
+ for( i=0; i<locnjob; i++ )
+ fprintf( stdout, ">better alignment\n%s\n", bseq[i] );
+ exit( 1 );
+ }
+ }
+#endif
length = strlen( mseq1[0] );
{
tscore = mscore;
if( !devide ) fprintf( trap_g, "tscore = %f identical.\n", tscore );
- fprintf( stderr, " identical." );
+ fprintf( stderr, " identical. " );
converged++;
}
else
intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble );
#endif
- tscore = impmatch + tmpdouble;
+ tscore = impmatchdouble + tmpdouble;
// fprintf( stderr, "tmpdouble=%f, impmatch = %f -> %f, tscore = %f\n", tmpdouble, oimpmatch, impmatch, tscore );
}
else
{
- intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble );
+ if( smalldistmtx )
+#if 1
+ intergroup_score_multimtx( whichmtx, scoringmatrices, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble );
+#else
+ intergroup_score_dynmtx( offsetmtx, amino_dis, mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble ); // n_dis ha machigai
+#endif
+ else
+ intergroup_score( mseq1, mseq2, effarr1, effarr2, clus1, clus2, length, &tmpdouble );
tscore = tmpdouble;
}
// fprintf( stderr, "#######ii=%d, iterate=%d score = %f -> %f \n", ii, iterate , mscore, tscore );
converged++;
}
}
- fprintf( stderr, "\r" );
+ if( alg == 'd' )
+ fprintf( stderr, "\n" );
+ else
+ fprintf( stderr, "\r" );
- history[iterate][l][k] = (float)tscore;
+ history[iterate][l][k] = (double)tscore;
// fprintf( stderr, "tscore = %f\n", tscore );
if( weight || constraint ) fprintf( stderr, " (differs from the objective score)" );
fprintf( stderr, "\n\n" );
}
- return( 0 );
+ value = 0;
+ goto end;
}
if( iterate >= 1 )
{
oscillating = 0;
for( ii=iterate-2; ii>=0; ii-=2 )
{
- if( (float)tscore == history[ii][l][k] )
+ if( (double)tscore == history[ii][l][k] )
{
oscillating = 1;
break;
fprintf( stderr, "\n\n" );
}
#if 1 /* hujuubun */
- return( -1 );
+ value = -1;
+ goto end;
#endif
}
} /* if( iterate ) */
}
} /* for( iterate ) */
}
- return( 2 );
+ value = 2;
+
+ end:
+ if( grouprna1 ) free( grouprna1 );
+ if( grouprna2 ) free( grouprna2 );
+// freelocalarrays
+// (
+// NULL,
+// grouprna1, grouprna2,
+// rnapairboth,
+// indication1, indication2,
+// distarr,
+// effarr, effarrforlocalhom, effarr1, effarr2,
+// mseq1, mseq2,
+// NULL,
+// gapmap1, gapmap2,
+// effarr1_kozo, effarr2_kozo, effarr_kozo,
+// memlist, pairbuf,
+// localhomshrink,
+// smalldistmtx,
+// scoringmatrices,
+// eff1s, eff2s,
+// whichmtx
+// );
+// free( branchnode );
+// free( stopol );
+ return( value );
} /* int Tree... */
if ( stopol[locnseq-2].children[0] == &stopol[locnseq-3] ) i = 1;
else if( stopol[locnseq-2].children[1] == &stopol[locnseq-3] ) i = 0;
- else ErrorExit( "?\n" );
+ else ErrorExit( "ERROR in stopol ?\n" );
stopol[locnseq-3].length[2] = len[locnseq-2][0] + len[locnseq-2][1];
stopol[locnseq-3].weightptr[2] = &bw[locnseq-2][0];
}
}
-void weightFromABranch_rec( double *result, Node *ob, Node *op )
+static void distFromABranch_rec( double *result, Node *ob, Node *op )
{
int i, n, count;
int dir_ch[3], dir_pa;
#if DEBUG
+ fprintf( stderr, "In distFromABranch_rec, ob = %d\n", ob - stopol_g );
+#endif
+ if( isLeaf( *ob ) ) return;
+ for( i=0, count=0; i<3; i++ )
+ {
+ if( ob->children[i] != op ) dir_ch[count++] = i;
+ else dir_pa = i;
+ }
+ if( count != 2 )
+ {
+#if DEBUG
+ fprintf( stderr, "Node No.%d has no child like No.%d \n", ob-stopol_g, op-stopol_g );
+#endif
+ ErrorExit( "Incorrect call of distFromABranch_rec" );
+ }
+ for( i=0; (n=ob->members[dir_ch[0]][i])!=-1; i++ )
+ {
+ result[n] += ob->length[dir_ch[0]];
+ }
+ distFromABranch_rec( result, ob->children[dir_ch[0]], ob );
+
+ for( i=0; (n=ob->members[dir_ch[1]][i])!=-1; i++ )
+ {
+ result[n] += ob->length[dir_ch[1]];
+ }
+ distFromABranch_rec( result, ob->children[dir_ch[1]], ob );
+}
+
+void distFromABranch( int nseq, double *result, Node *stopol, int ***topol, double **len, int step, int LorR )
+{
+ Node *topNode, *btmNode;
+ int i;
+
+ if( nseq == 2 )
+ {
+ result[0] = len[0][0];
+ result[1] = len[0][1];
+// reporterr( "result[0] = %f\n", result[0] );
+// reporterr( "result[1] = %f\n", result[1] );
+ return;
+ }
+
+ if( step == nseq - 2 )
+ {
+ topNode = stopol[nseq-2].children[0];
+ btmNode = stopol + nseq-3;
+#if DEBUG
+ fprintf( stderr, "Now step == nseq-3, topNode = %d, btmNode = %d\n", topNode - stopol_g, btmNode-stopol_g );
+#endif
+ }
+
+ else
+ {
+ for( i=0; i<3; i++ )
+ {
+ if( stopol[step].members[i][0] == topol[step][LorR][0] )
+ break;
+ }
+ if( i== 3 ) ErrorExit( "Incorrect call of distFromABranch." );
+ btmNode = stopol[step].children[i];
+ topNode = stopol+step;
+ }
+
+ for( i=0; i<nseq; i++ ) result[i] = 0.0;
+ distFromABranch_rec( result, btmNode, topNode );
+ distFromABranch_rec( result, topNode, btmNode );
+#if 0
+ for( i=0; i<nseq; i++ )
+ fprintf( stdout, "w[%d] = %30.20f\n", i, result[i] );
+#endif
+// fprintf( stderr, "new weight!\n" );
+// for( i=0; i<nseq; i++ )
+// result[i] *= result[i];
+
+
+}
+
+static void weightFromABranch_rec( double *result, Node *ob, Node *op )
+{
+ int i, n, count;
+ int dir_ch[3], dir_pa;
+
+
+#if DEBUG
fprintf( stderr, "In weightFromABranch_rec, ob = %d\n", ob - stopol_g );
#endif
if( isLeaf( *ob ) ) return;
weightFromABranch_rec( result, topNode, btmNode );
#if 0
for( i=0; i<nseq; i++ )
- fprintf( stdout, "w[%d] = %f\n", i, result[i] );
+ fprintf( stdout, "w[%d] = %30.20f\n", i, result[i] );
#endif
// fprintf( stderr, "new weight!\n" );
// for( i=0; i<nseq; i++ )
done
make clean
-make CC="gcc-4.0" CFLAGS="-arch ppc64 -m64 -O3 -mmacosx-version-min=10.4 -isysroot/Developer/SDKs/MacOSX10.4u.sdk -DMACOSX_DEPLOYMENT_TARGET=10.4"
+make CC="$HOME/soft/gcc/usr/local/bin/gcc" CFLAGS="-O3 -m32 -mmacosx-version-min=10.5 -isysroot/Developer/SDKs/MacOSX10.5.sdk -DMACOSX_DEPLOYMENT_TARGET=10.5 -static-libgcc" LIBS="-lm -lpthread -lgcc_eh" ENABLE_MULTITHREAD="-Denablemultithread"
for prog in $progs; do
- mv $prog $prog.ppc64
+ mv $prog $prog.intel32
done
make clean
-make CC="gcc-4.0" CFLAGS="-arch ppc -m32 -O3 -mmacosx-version-min=10.4 -isysroot/Developer/SDKs/MacOSX10.4u.sdk -DMACOSX_DEPLOYMENT_TARGET=10.4"
+make CC="$HOME/soft/gcc/usr/local/bin/gcc" CFLAGS="-O3 -m64 -mmacosx-version-min=10.5 -isysroot/Developer/SDKs/MacOSX10.5.sdk -DMACOSX_DEPLOYMENT_TARGET=10.5 -static-libgcc" LIBS="-lm -lpthread -lgcc_eh" ENABLE_MULTITHREAD="-Denablemultithread"
for prog in $progs; do
- mv $prog $prog.ppc32
+ mv $prog $prog.intel64
done
make clean
-make CC="gcc-4.0" CFLAGS="-arch i386 -fast -m32 -mmacosx-version-min=10.4 -isysroot/Developer/SDKs/MacOSX10.4u.sdk -DMACOSX_DEPLOYMENT_TARGET=10.4"
+make CC="gcc-4.0" CFLAGS="-arch ppc64 -m64 -O3 -mmacosx-version-min=10.5 -isysroot/Developer/SDKs/MacOSX10.5.sdk -DMACOSX_DEPLOYMENT_TARGET=10.5" ENABLE_MULTITHREAD=""
for prog in $progs; do
- mv $prog $prog.intel32
+ mv $prog $prog.ppc64
done
make clean
-make CC="gcc-4.0" CFLAGS="-arch x86_64 -fast -m64 -mmacosx-version-min=10.4 -isysroot/Developer/SDKs/MacOSX10.4u.sdk -DMACOSX_DEPLOYMENT_TARGET=10.4"
+make CC="gcc-4.0" CFLAGS="-arch ppc -m32 -O3 -mmacosx-version-min=10.5 -isysroot/Developer/SDKs/MacOSX10.5.sdk -DMACOSX_DEPLOYMENT_TARGET=10.5" ENABLE_MULTITHREAD=""
for prog in $progs; do
- mv $prog $prog.intel64
+ mv $prog $prog.ppc32
done
+
set $progs
for prog in $progs; do
# lipo -create $prog.icc $prog.ppc32 $prog.ppc64 -output $prog
--- /dev/null
+#include "mltaln.h"
+
+int main()
+{
+ fprintf( stdout, VERSION );
+ return( 0 );
+}
-----------------------------------------------------------------------
MAFFT: a multiple sequence alignment program
- version 6.857beta, 2011/05/30
+ version 7.310beta, 2017/Mar/17
http://mafft.cbrc.jp/alignment/software/
kazutaka.katoh@aist.go.jp
% cd ..
-2. INSTALL (select 2a or 2b [])
-2a. Install as root
+2. INSTALL (select 2a or 2b)
+2a. Install to /usr/local/ using root account
# cd core
# make install
# cd ..
the binaries in the /somewhere/else/ directory are used, instead
of those in /usr/local/libexec/mafft/.
-2b. Install without being root
+2b. Install to non-default location (root account is not necessary)
% cd core/
Edit the first line of Makefile
From:
- PREFIX = ${PKGDIR}/usr/local
+ PREFIX = /usr/local
To:
PREFIX = /home/your_home/somewhere
% cd ../extensions/
Edit the first line of Makefile
From:
- PREFIX = ${PKGDIR}/usr/local
+ PREFIX = /usr/local
To:
PREFIX = /home/your_home/somewhere
% make clean
-#! /bin/sh
-
+#! /bin/bash
er=0;
myself=`dirname "$0"`/`basename "$0"`; export myself
-version="v6.857b (2011/05/30)"; export version
+version="v7.310 (2017/Mar/17)"; export version
LANG=C; export LANG
os=`uname`
progname=`basename "$0"`
if [ `echo $os | grep -i cygwin` ]; then
os="cygwin"
+elif [ `echo $os | grep -i mingw` ]; then
+ os="mingw"
elif [ `echo $os | grep -i darwin` ]; then
os="darwin"
elif [ `echo $os | grep -i sunos` ]; then
fi
fi
-if [ ! -x "$prefix/tbfast" ]; then
+if [ -x "$prefix/version" ]; then
+ versionbin=`"$prefix/version"` # for cygwin 2.7
+ else
+ versionbin="0.000"
+fi
+
+if ! expr "$version" : v"$versionbin" > /dev/null ; then
+ echo "" 1>&2
+ echo "v$versionbin != $version" 1>&2
+ echo "" 1>&2
+ echo "There is a problem in the configuration of your shell." 1>&2
+ echo "Check the MAFFT_BINARIES environmental variable by" 1>&2
+ echo "$ echo \$MAFFT_BINARIES" 1>&2
+ echo "" 1>&2
+ echo "This variable must be *unset*, unless you have installed MAFFT" 1>&2
+ echo "with a special configuration. To unset this variable, type" 1>&2
+ echo "$ unset MAFFT_BINARIES" 1>&2
+ echo "or" 1>&2
+ echo "% unsetenv MAFFT_BINARIES" 1>&2
+ echo "Then retry" 1>&2
+ echo "$ mafft input > output" 1>&2
+ echo "" 1>&2
+ echo "To keep this change permanently, edit setting files" 1>&2
+ echo "(.bash_profile, .profile, .cshrc, etc) in your home directory" 1>&2
+ echo "to delete the MAFFT_BINARIES line." 1>&2
+ echo "On MacOSX, also edit or remove the .MacOSX/environment.plist file" 1>&2
+ echo "and then re-login (MacOSX 10.6) or reboot (MacOSX 10.7)." 1>&2
echo "" 1>&2
- echo "correctly installed?" 1>&2
- echo "mafft binaries have to be installed in \$MAFFT_BINARIES" 1>&2
- echo "or the $prefix directory". 1>&2
+ echo "Please send a problem report to kazutaka.katoh@aist.go.jp," 1>&2
+ echo "if this problem remains." 1>&2
echo "" 1>&2
exit 1
er=1
defaultlgop="-2.00"
defaultfft=1
defaultrough=0
-defaultdistance="sixtuples"
+defaultdistance="ktuples"
#defaultdistance="local"
defaultweighti="2.7"
defaultweightr="0.0"
defaultweightm="1.0"
+defaultdafs=0
defaultmccaskill=0
defaultcontrafold=0
defaultalgopt=" "
defaultweightm="2.0"
defaultmccaskill=1
defaultcontrafold=0
+ defaultdafs=0
defaultalgopt=" -A "
defaultalgoptit=" -AB " ## chui
defaultaof="0.0"
defaultsbstmodel=" -b 62 "
defaultkappa=" "
- defaultfmodel=" -a "
+ defaultfmodel=" " # 2013/06/18
elif [ $progname = "qinsi" -o $progname = "mafft-qinsi" ]; then
defaultfft=1
defaultcycle=1
defaultweightm="2.0"
defaultmccaskill=1
defaultcontrafold=0
+ defaultdafs=0
defaultalgopt=" -A "
defaultalgoptit=" -AB " ## chui
defaultaof="0.0"
defaultsbstmodel=" -b 62 "
defaultkappa=" "
- defaultfmodel=" -a "
+ defaultfmodel=" " # 2013/06/18
elif [ $progname = "linsi" -o $progname = "mafft-linsi" ]; then
defaultfft=0
defaultcycle=1
elif [ $progname = "fftns" -o $progname = "mafft-fftns" ]; then
defaultfft=1
defaultcycle=2
- defaultdistance="sixtuples"
+ defaultdistance="ktuples"
elif [ $progname = "fftnsi" -o $progname = "mafft-fftnsi" ]; then
defaultfft=1
defaultcycle=2
defaultiterate=2
- defaultdistance="sixtuples"
+ defaultdistance="ktuples"
elif [ $progname = "nwns" -o $progname = "mafft-nwns" ]; then
defaultfft=0
defaultcycle=2
- defaultdistance="sixtuples"
+ defaultdistance="ktuples"
elif [ $progname = "nwnsi" -o $progname = "mafft-nwnsi" ]; then
defaultfft=0
defaultcycle=2
defaultiterate=2
- defaultdistance="sixtuples"
+ defaultdistance="ktuples"
fi
outputfile=""
-namelength=15
+namelength=-1
anysymbol=0
parallelizationstrategy="BAATARI2"
kappa=$defaultkappa
sbstmodel=$defaultsbstmodel
fmodel=$defaultfmodel
+nmodel=" "
gop=$defaultgop
+gopdist=$defaultgop
aof=$defaultaof
cycle=$defaultcycle
iterate=$defaultiterate
weightr=$defaultweightr
weightm=$defaultweightm
rnaalifold=0
+dafs=$defaultdafs
mccaskill=$defaultmccaskill
contrafold=$defaultcontrafold
-quiet=0
+progressfile="/dev/stderr"
debug=0
sw=0
algopt=$defaultalgopt
algoptit=$defaultalgoptit
+#algspecified=0
+pairspecified=0
scorecalcopt=" "
coreout=0
corethr="0.5"
corewin="100"
coreext=" "
outputformat="pir"
+f2clext="-N"
outorder="input"
seed="x"
seedtable="x"
auto=0
groupsize=-1
partsize=50
-partdist="sixtuples"
+partdist="ktuples"
partorderopt=" -x "
treeout=0
distout=0
treeinopt=" "
seedfiles="/dev/null"
seedtablefile="/dev/null"
+pdblist="/dev/null"
+ownlist="/dev/null"
+strdir="$PWD"
aamatrix="/dev/null"
treeinfile="/dev/null"
rnascoremtx=" "
laraparams="/dev/null"
foldalignopt=" "
-treealg=" -X "
+treealg=" -X 0.1 "
+sueff="1.0"
scoreoutarg=" "
numthreads=0
+numthreadsit=-1
+numthreadstb=-1
randomseed=0
addfile="/dev/null"
addarg0=" "
+addarg=" "
+addsinglearg=" "
+add2ndhalfarg=" "
+mapoutfile="/dev/null"
+fragment=0
+legacygapopt=" "
+mergetable="/dev/null"
+mergearg=" "
+seedoffset=0
outnum=" "
+last_e=5000
+last_m=3
+last_subopt=" "
+last_once=" "
+adjustdirection=0
+tuplesize=6
+termgapopt=" -O "
+#termgapopt=" " # gap/gap ga kakenai node
+similarityoffset="0.0"
+unalignlevel="0.0"
+unalignspecified=0
+spfactor="100.0"
+shiftpenaltyspecified=0
+opdistspecified=0
+allowshift=0
+enrich=0
+enrichseq=0
+enrichstr=0
+seektarget=""
+fixthreshold="0.0"
+bunkatsuopt=" "
+npickup=0
+minimumweight="0.00001" # 2016/Mar
+usenaivepairscore=" "
+oldgenafparam=0
+sprigorous=0
+pileuporshuffle="l"
+initialramusage="20GB"
+focusarg=" "
if [ $# -gt 0 ]; then
+ if [ "$1" = "--version" ]; then
+ echo "$version" 1>&2
+ exit 0;
+ elif [ "$1" = "--help" -o "$1" = "--info" ]; then
+ shift
+ er=1;
+ fi
while [ $# -gt 1 ];
do
if [ "$1" = "--auto" ]; then
partorderopt=" -x "
elif [ "$1" = "--unweight" ]; then
weightopt=" -u "
+ elif [ "$1" = "--termgappenalty" ]; then
+ termgapopt=" "
+ elif [ "$1" = "--alga" ]; then
+ algopt=" "
+ algoptit=" "
+# algspecified=1
elif [ "$1" = "--algq" ]; then
algopt=" -Q "
- algoptit=" -QB "
+ algoptit=" "
+ echo "" 1>&2
+ echo "--algq is no longer supported!" 1>&2
+ echo "" 1>&2
+ exit 1;
+# algspecified=1
elif [ "$1" = "--namelength" ]; then
shift
namelength=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify the length of name in clustal format output!" 1>&2
+ exit
+ fi
elif [ "$1" = "--groupsize" ]; then
shift
groupsize=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify groupsize!" 1>&2
+ exit
+ fi
elif [ "$1" = "--partsize" ]; then
shift
partsize=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify partsize!" 1>&2
+ exit
+ fi
elif [ "$1" = "--parttree" ]; then
distance="parttree"
- partdist="sixtuples"
+ partdist="ktuples"
elif [ "$1" = "--dpparttree" ]; then
distance="parttree"
partdist="localalign"
distout=1
elif [ "$1" = "--fastswpair" ]; then
distance="fasta"
+ pairspecified=1
sw=1
elif [ "$1" = "--fastapair" ]; then
distance="fasta"
+ pairspecified=1
sw=0
elif [ "$1" = "--averagelinkage" ]; then
- treealg=" -E "
+ treealg=" -X 1.0 "
+ sueff="1.0"
elif [ "$1" = "--minimumlinkage" ]; then
- treealg=" -q "
+ treealg=" -X 0.0 "
+ sueff="0.0"
+ elif [ "$1" = "--mixedlinkage" ]; then
+ shift
+ sueff="$1"
+ treealg=" -X $1"
elif [ "$1" = "--noscore" ]; then
scorecalcopt=" -Z "
+ elif [ "$1" = "--6mermultipair" ]; then
+ distance="ktuplesmulti"
+ tuplesize=6
+ pairspecified=1
+ elif [ "$1" = "--10mermultipair" ]; then
+ distance="ktuplesmulti"
+ tuplesize=10
+ pairspecified=1
elif [ "$1" = "--6merpair" ]; then
- distance="sixtuples"
+ distance="ktuples"
+ tuplesize=6
+ pairspecified=1
+ elif [ "$1" = "--10merpair" ]; then
+ distance="ktuples"
+ tuplesize=10
+ pairspecified=1
elif [ "$1" = "--blastpair" ]; then
distance="blast"
+ pairspecified=1
+ elif [ "$1" = "--lastmultipair" ]; then
+ distance="lastmulti"
+ pairspecified=1
elif [ "$1" = "--globalpair" ]; then
distance="global"
+ pairspecified=1
+ elif [ "$1" = "--shortlongpair" ]; then
+ distance="local"
+ usenaivepairscore="-Z"
+ laof=0.0 # addfull no tokini tsukawareru.
+ lexp=0.0 # addfull no tokini tsukawareru.
+ pgaof=0.0 # local nara iranai
+ pgexp=0.0 # local nara iranai
+ pairspecified=1
+ elif [ "$1" = "--longshortpair" ]; then
+ distance="local"
+ usenaivepairscore="-Z"
+ laof=0.0 # addfull no tokini tsukawareru.
+ lexp=0.0 # addfull no tokini tsukawareru.
+ pgaof=0.0 # local nara iranai
+ pgexp=0.0 # local nara iranai
+ pairspecified=1
elif [ "$1" = "--localpair" ]; then
distance="local"
+ pairspecified=1
+ elif [ "$1" = "--lastpair" ]; then
+ distance="last"
+ pairspecified=1
+ elif [ "$1" = "--multipair" ]; then
+ distance="multi"
+ pairspecified=1
+ elif [ "$1" = "--hybridpair" ]; then
+ distance="hybrid"
+ pairspecified=1
elif [ "$1" = "--scarnapair" ]; then
distance="scarna"
+ pairspecified=1
+ elif [ "$1" = "--dafspair" ]; then
+ distance="dafs"
+ pairspecified=1
elif [ "$1" = "--larapair" ]; then
distance="lara"
+ pairspecified=1
elif [ "$1" = "--slarapair" ]; then
distance="slara"
+ pairspecified=1
elif [ "$1" = "--foldalignpair" ]; then
distance="foldalignlocal"
+ pairspecified=1
elif [ "$1" = "--foldalignlocalpair" ]; then
distance="foldalignlocal"
+ pairspecified=1
elif [ "$1" = "--foldalignglobalpair" ]; then
distance="foldalignglobal"
+ pairspecified=1
elif [ "$1" = "--globalgenafpair" ]; then
distance="globalgenaf"
+ pairspecified=1
+ echo "" 1>&2
+ echo "--globalgenaf is no longer supported!" 1>&2
+ echo "" 1>&2
+ exit 1;
elif [ "$1" = "--localgenafpair" ]; then
distance="localgenaf"
+ pairspecified=1
elif [ "$1" = "--genafpair" ]; then
distance="localgenaf"
+ pairspecified=1
+ elif [ "$1" = "--oldgenafpair" ]; then
+ distance="localgenaf"
+ pairspecified=1
+ oldgenafparam=1
elif [ "$1" = "--memsave" ]; then
memopt=" -M -B " # -B (bunkatsunashi no riyu ga omoidasenai)
elif [ "$1" = "--nomemsave" ]; then
elif [ "$1" = "--nofft" ]; then
fft=0
elif [ "$1" = "--quiet" ]; then
- quiet=1
+ if [ $os = "mingw" ]; then
+ progressfile="nul"
+ else
+ progressfile="/dev/null"
+ fi
elif [ "$1" = "--debug" ]; then
debug=1
elif [ "$1" = "--coreext" ]; then
coreext=" -c "
elif [ "$1" = "--core" ]; then
coreout=1
+ elif [ "$1" = "--adjustdirection" ]; then
+ adjustdirection=1
+ elif [ "$1" = "--adjustdirectionaccurately" ]; then
+ adjustdirection=2
+ elif [ "$1" = "--progress" ]; then
+ shift
+ progressfile="$1"
+ if ! ( expr "$progressfile" : "\/" > /dev/null || expr "$progressfile" : "[A-Za-z]\:" > /dev/null ) ; then
+ echo "Specify a progress file name with the absolute path!" 1>&2
+ exit
+ fi
elif [ "$1" = "--out" ]; then
shift
outputfile="$1"
elif [ "$1" = "--thread" ]; then
shift
+ if ! expr "$1" : "[0-9\-]" > /dev/null ; then
+ echo "Specify the number of threads. Or, use --thread -1" 1>&2
+ exit
+ fi
numthreads=`expr "$1" - 0`
+ elif [ "$1" = "--threadtb" ]; then
+ shift
+ if ! expr "$1" : "[0-9\-]" > /dev/null ; then
+ echo "Specify the number of threads for the iterative step!" 1>&2
+ exit
+ fi
+ numthreadstb=`expr "$1" - 0`
+ elif [ "$1" = "--threadit" ]; then
+ shift
+ if ! expr "$1" : "[0-9\-]" > /dev/null ; then
+ echo "Specify the number of threads for the iterative step!" 1>&2
+ exit
+ fi
+ numthreadsit=`expr "$1" - 0`
+ elif [ "$1" = "--last_subopt" ]; then
+ last_subopt="-S"
+ elif [ "$1" = "--last_once" ]; then
+ last_once="-U"
+ elif [ "$1" = "--last_m" ]; then
+ shift
+ last_m=`expr "$1" - 0`
+ elif [ "$1" = "--last_e" ]; then
+ shift
+ last_e=`expr "$1" - 0`
elif [ "$1" = "--randomseed" ]; then
shift
randomseed=`expr "$1" - 0`
elif [ "$1" = "--scoreout" ]; then
scoreoutarg="-S -B"
elif [ "$1" = "--outnum" ]; then
- scoreoutarg="-n"
+ outnum="-n"
+ elif [ "$1" = "--leavegappyregion" ]; then
+ legacygapopt="-L"
+ elif [ "$1" = "--legacygappenalty" ]; then
+ legacygapopt="-L"
+ elif [ "$1" = "--merge" ]; then
+ shift
+ mergetable="$1"
+ if [ ! -e "$mergetable" ]; then
+ echo "Cannot open $mergetable" 1>&2
+ echo "" 1>&2
+ exit
+ fi
elif [ "$1" = "--addprofile" ]; then
shift
addarg0="-I"
shift
addarg0="-K -I"
addfile="$1"
+ elif [ "$1" = "--addfragments" ]; then
+ shift
+ addarg0="-K -I"
+ addfile="$1"
+ fragment=1
+ elif [ "$1" = "--addfull" ]; then
+ shift
+ addarg0="-K -I"
+ addfile="$1"
+ fragment=-1
+ elif [ "$1" = "--addlong" ]; then
+ shift
+ addarg0="-K -I"
+ addfile="$1"
+ fragment=-2
+ elif [ "$1" = "--smoothing" ]; then
+ add2ndhalfarg=$add2ndhalfarg" -p "
+ elif [ "$1" = "--keeplength" ]; then
+ add2ndhalfarg=$add2ndhalfarg" -Y "
+ elif [ "$1" = "--mapout" ]; then
+ add2ndhalfarg=$add2ndhalfarg" -Z -Y "
+ elif [ "$1" = "--mapoutfile" ]; then
+ shift
+ add2ndhalfarg=$add2ndhalfarg" -Z -Y "
+ mapoutfile="$1"
elif [ "$1" = "--maxiterate" ]; then
shift
iterate=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify the number of iterations!" 1>&2
+ exit
+ fi
elif [ "$1" = "--retree" ]; then
shift
cycle=`expr "$1" - 0`
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify the number of tree rebuilding!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--text" ]; then
+ sbstmodel=" -b -2 -a "
+ f2clext="-E"
+ seqtype="-P"
+ fft=0
elif [ "$1" = "--aamatrix" ]; then
shift
sbstmodel=" -b -1 "
aamatrix="$1"
+ if [ ! -e "$aamatrix" ]; then
+ echo "Cannot open $aamatrix" 1>&2
+ echo "" 1>&2
+ exit
+ fi
elif [ "$1" = "--treein" ]; then
shift
treeinopt=" -U "
treein=1
treeinfile="$1"
+ if [ ! -e "$treeinfile" ]; then
+ echo "Cannot open $treeinfile" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--pileup" ]; then
+ treeinopt=" -U "
+ treein=1
+ pileuporshuffle="p"
+ elif [ "$1" = "--randomchain" ]; then
+ treeinopt=" -U "
+ treein=1
+ pileuporshuffle="s"
elif [ "$1" = "--topin" ]; then
shift
treeinopt=" -V "
echo "There was a bug in version < 6.530." 1>&2
echo "This bug has not yet been fixed." 1>&2
exit 1
+ elif [ "$1" = "--memsavetree" ]; then
+ treeinopt=" -U "
+ treein=1
+ pileuporshuffle="C"
+ elif [ "$1" = "--memsavetreex" ]; then
+ treeinopt=" -U "
+ treein=1
+ pileuporshuffle="c"
+ elif [ "$1" = "--initialramusage" ]; then
+ shift
+ treeinopt=" -U "
+ treein=1
+ initialramusage="$1"
+ pileuporshuffle="c"
elif [ "$1" = "--kappa" ]; then
shift
kappa=" -k $1 "
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify kappa value!" 1>&2
+ exit
+ fi
elif [ "$1" = "--fmodel" ]; then
fmodel=" -a "
+ elif [ "$1" = "--nwildcard" ]; then
+ nmodel=" -: "
+ elif [ "$1" = "--nzero" ]; then
+ nmodel=" "
elif [ "$1" = "--jtt" ]; then
shift
sbstmodel=" -j $1"
+# if ! expr "$1" : "[0-9]" > /dev/null ; then
+# echo "Specify pam value!" 1>&2
+# exit
+# fi
elif [ "$1" = "--kimura" ]; then
shift
sbstmodel=" -j $1"
+# if ! expr "$1" : "[0-9]" > /dev/null ; then
+# echo "Specify pam value!" 1>&2
+# exit
+# fi
elif [ "$1" = "--tm" ]; then
shift
sbstmodel=" -m $1"
+# if ! expr "$1" : "[0-9]" > /dev/null ; then
+# echo "Specify pam value!" 1>&2
+# exit
+# fi
elif [ "$1" = "--bl" ]; then
shift
sbstmodel=" -b $1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "blosum $1?" 1>&2
+ exit
+ fi
elif [ "$1" = "--weighti" ]; then
shift
weighti="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify weighti value!" 1>&2
+ exit
+ fi
elif [ "$1" = "--weightr" ]; then
shift
weightr="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify weightr value!" 1>&2
+ exit
+ fi
elif [ "$1" = "--weightm" ]; then
shift
weightm="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify weightm value!" 1>&2
+ exit
+ fi
elif [ "$1" = "--rnaalifold" ]; then
rnaalifold=1
elif [ "$1" = "--mccaskill" ]; then
mccaskill=1
contrafold=0
+ dafs=0
elif [ "$1" = "--contrafold" ]; then
mccaskill=0
contrafold=1
+ dafs=0
+ elif [ "$1" = "--dafs" ]; then
+ mccaskill=0
+ contrafold=0
+ dafs=1
elif [ "$1" = "--ribosum" ]; then
rnascoremtx=" -s "
elif [ "$1" = "--op" ]; then
shift
gop="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify op!" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--opdist" ]; then
+ shift
+ gopdist="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify opdist!" 1>&2
+ exit
+ fi
+ opdistspecified=1
+ elif [ "$1" = "--allowshift" ]; then
+ allowshift=1
+ elif [ "$1" = "--shiftpenalty" ]; then
+ shift
+ spfactor="$1"
+ if ! expr "$1" : "[0-9]" > /dev/null ; then
+ echo "Specify sf!" 1>&2
+ exit
+ fi
+ shiftpenaltyspecified=1
elif [ "$1" = "--ep" ]; then
shift
- aof="$1"
+# aof="$1"
+ tmpval="$1"
+ aof=`awk "BEGIN{ print -1.0 * \"$tmpval\"}"`
+ if ! expr "$aof" : "[0-9\-]" > /dev/null ; then
+ printf "\nSpecify a number for ep, like --ep 0.1\n" 1>&2
+ printf "'$1' cannot be interpreted as a number..\n\n" 1>&2
+ exit
+ fi
elif [ "$1" = "--rop" ]; then
shift
rgop="$1"
+# Atode check
elif [ "$1" = "--rep" ]; then
shift
rgep="$1"
elif [ "$1" = "--corewin" ]; then
shift
corewin="$1"
+ elif [ "$1" = "--strdir" ]; then
+ shift
+ strdir="$1"
+ elif [ "$1" = "--pdbidlist" ]; then
+ shift
+ pdblist="$1"
+ if [ ! -e "$pdblist" ]; then
+ echo "Cannot open $pdblist" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--pdbfilelist" ]; then
+ shift
+ ownlist="$1"
+ if [ ! -e "$ownlist" ]; then
+ echo "Cannot open $ownlist" 1>&2
+ echo "" 1>&2
+ exit
+ fi
+ elif [ "$1" = "--enrich" ]; then
+ enrich=1
+ enrichseq=1
+ enrichstr=1
+ seektarget=""
+ elif [ "$1" = "--enrichseq" ]; then
+ enrich=1
+ enrichseq=1
+ enrichstr=0
+ seektarget="-seq"
+ elif [ "$1" = "--enrichstr" ]; then
+ enrich=1
+ enrichseq=0
+ enrichstr=1
+ seektarget="-str"
elif [ "$1" = "--seedtable" ]; then
shift
seedtable="y"
shift
seed="m"
seedfiles="$seedfiles $1"
+ elif [ "$1" = "--minimumweight" ]; then
+ shift
+ minimumweight="$1"
+ elif [ "$1" = "--similaritylevel" ]; then
+ shift
+ similarityoffset="$1"
+ elif [ "$1" = "--unalignlevel" ]; then
+ shift
+ unalignlevel="$1"
+ unalignspecified=1
+ elif [ "$1" = "--skipiterate" ]; then
+ shift
+ fixthreshold="$1"
+ elif [ "$1" = "--bunkatsunashi" ]; then
+ bunkatsuopt=" -B "
+ elif [ "$1" = "--sp" ]; then
+ sprigorous=1
+ elif [ "$1" = "--focus" ]; then
+ focusarg=" -= "
+ elif [ "$1" = "--sparsepickup" ]; then
+ shift
+ npickup="$1"
elif [ $progname = "fftns" -o $progname = "nwns" ]; then
if [ "$1" -gt 0 ]; then
cycle=`expr "$1" - 0`
else
echo "Unknown option: $1" 1>&2
er=1;
+# exit 1;
fi
shift
done;
+ echo "" 1>"$progressfile"
# TMPFILE=/tmp/$progname.$$
TMPFILE=`mktemp -dt $progname.XXXXXXXXXX`
echo "mktemp seems to be obsolete. Re-trying without -t" 1>&2
TMPFILE=`mktemp -d /tmp/$progname.XXXXXXXXXX`
fi
+
+# if [ $os = "cygwin" ]; then
+# TMPFILE=`cygpath -w $TMPFILE` unnecessary for cygwin2.7
+# fi
+
umask 077
# mkdir $TMPFILE || er=1
if [ $debug -eq 1 ]; then
- trap "tar cfvz debuginfo.tgz $TMPFILE; rm -rf $TMPFILE " 0
+# trap "tar cfvz debuginfo.tgz $TMPFILE; rm -rf $TMPFILE " 0 # does not work in msys
+ trap "tar cfv - $TMPFILE | gzip -c > debuginfo.tgz; rm -rf $TMPFILE " 0
else
- trap "rm -rf $TMPFILE " 0
+ trap "rm -rf $TMPFILE" 0
fi
if [ $# -eq 1 ]; then
if [ -r "$1" -o "$1" = - ]; then
printf '';
else
echo "$0": Cannot open "$addfile". 1>&2
+ echo "" 1>&2
exit 1;
fi
cat "$addfile" | tr "\r" "\n" | grep -v "^$" >> $TMPFILE/infile
cat "$addfile" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_addfile
cat "$aamatrix" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_aamtx
+ cat "$mergetable" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_subalignmentstable
cat "$treeinfile" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_guidetree
cat "$seedtablefile" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_seedtablefile
cat "$laraparams" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/_lara.params
+ cat "$pdblist" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/pdblist
+ cat "$ownlist" | tr "\r" "\n" | grep -v "^$" > $TMPFILE/ownlist
+
# echo $seedfiles
infilename="$1"
seedfilesintmp="/dev/null"
cat "$1" | tr "\r" "\n" > $TMPFILE/seed$#
else
echo "$0": Cannot open "$1". 1>&2
+ echo "" 1>&2
exit 1;
fi
seednseq=$seednseq" "`grep -c '^[>|=]' $TMPFILE/seed$#`
else
echo "$0": Cannot open "$1". 1>&2
+ echo "" 1>&2
er=1
# exit 1;
fi
else
- echo '$#'"=$#" 1>&2
+# echo '$#'"=$#" 1>&2
er=1
fi
- if [ $os != "linux" ]; then
- numthreads=0
+
+ if [ $numthreads -lt 0 ]; then
+ if [ $os = "linux" ]; then
+ nlogicalcore=`cat /proc/cpuinfo | grep "^processor" | uniq | wc -l`
+ ncoresinacpu=`cat /proc/cpuinfo | grep 'cpu cores' | uniq | awk '{print $4}'`
+ nphysicalcpu=`cat /proc/cpuinfo | grep 'physical id' | sort | uniq | wc -l`
+ if [ $nlogicalcore -eq 0 ]; then
+ echo "Cannot get the number of processors from /proc/cpuinfo" 1>>"$progressfile"
+ exit 1
+ fi
+ if [ ${#ncoresinacpu} -gt 0 -a $nphysicalcpu -gt 0 ]; then
+ numthreads=`expr $ncoresinacpu '*' $nphysicalcpu`
+# if [ $nlogicalcore -gt $numthreads ]; then # Hyperthreading
+# numthreads=`expr $numthreads '+' 1`
+# fi
+ else
+ numthreads=$nlogicalcore
+ fi
+ elif [ $os = "darwin" ]; then
+ numthreads=`sysctl -n hw.physicalcpu`
+ if [ -z $numthreads ]; then
+ echo "Cannot get the number of physical cores from sysctl" 1>>"$progressfile"
+ exit 1
+ fi
+# nlogicalcore=`sysctl -n hw.logicalcpu`
+# if [ $nlogicalcore -gt $numthreads ]; then # Hyperthreading
+# numthreads=`expr $numthreads '+' 1`
+# fi
+ elif [ $os = "mingw" -o $os = "cygwin" ]; then
+ numthreads=`wmic cpu get NumberOfCores | head -2 | tail -1 | awk '{print $1}'`
+ else
+ echo "Cannot count the number of physical cores." 1>>"$progressfile"
+ exit 1
+ fi
+ echo "OS = "$os 1>>"$progressfile"
+ echo "The number of physical cores = " $numthreads 1>>"$progressfile"
+ fi
+
+ if [ $numthreadstb -lt 0 ]; then
+ numthreadstb=$numthreads
+ fi
+
+ if [ $numthreadsit -lt 0 ]; then
+ if [ $numthreads -lt 11 ]; then
+ numthreadsit=$numthreads
+ else
+ numthreadsit=10
+ fi
fi
- if [ $numthreads -eq 0 -a $parallelizationstrategy = "BESTFIRST" ]; then
+ if [ $numthreadsit -eq 0 -a $parallelizationstrategy = "BESTFIRST" ]; then
echo 'Impossible' 1>&2;
exit 1;
fi
+ if [ "$addarg0" != " " ]; then
+ iterate=0 # 2013/03/23
+ "$prefix/countlen" < $TMPFILE/_addfile > $TMPFILE/addsize 2>>"$progressfile"
+ nadd=`awk '{print $1}' $TMPFILE/addsize`
+ if [ $nadd -eq "0" ]; then
+ echo Check $addfile 1>&2
+ exit 1;
+ fi
+ if [ $seed != "x" -o $seedtable != "x" ]; then
+ echo 'Impossible' 1>&2;
+ echo 'Use either ONE of --seed, --seedtable, --addprofile and --add.' 1>&2
+ exit 1;
+ fi
+ else
+ nadd="0"
+ fi
+
if [ $auto -eq 1 ]; then
- "$prefix/countlen" < $TMPFILE/infile > $TMPFILE/size
+ "$prefix/countlen" < $TMPFILE/infile > $TMPFILE/size 2>>"$progressfile"
nseq=`awk '{print $1}' $TMPFILE/size`
nlen=`awk '{print $3}' $TMPFILE/size`
- if [ $nlen -lt 2000 -a $nseq -lt 100 ]; then
+
+ if [ $nlen -lt 3000 -a $nseq -lt 100 ]; then
distance="local"
iterate=1000
+ cycle=1
+ elif [ $nlen -lt 1000 -a $nseq -lt 200 ]; then
+ distance="local"
+ iterate=2
+ cycle=1
elif [ $nlen -lt 10000 -a $nseq -lt 500 ]; then
- distance="sixtuples"
+ distance="ktuples"
iterate=2
- else
- distance="sixtuples"
+ cycle=2
+ elif [ $nseq -lt 50000 ]; then # changed from 10000 2014/Oct/4
+ distance="ktuples"
+ iterate=0
+ cycle=2
+ elif [ $nseq -lt 90000 ]; then # changed from 30000 2014/Oct/4
+ distance="ktuples"
iterate=0
+ cycle=1
+ elif [ $nlen -lt 3000 ]; then
+ distance="parttree"
+ partdist="localalign"
+ algopt=" "
+ algoptit=" "
+# algspecified=1
+ cycle=1
+ else
+ distance="parttree"
+ partdist="ktuples"
+ algopt=" "
+ algoptit=" "
+# algspecified=1
+ cycle=1
fi
- if [ $quiet -eq 0 ]; then
- echo "nseq = " $nseq 1>&2
- echo "nlen = " $nlen 1>&2
- echo "distance = " $distance 1>&2
- echo "iterate = " $iterate 1>&2
+
+
+# if [ $nlen -lt 3000 -a $nseq -lt 100 ]; then
+# distance="local"
+# iterate=1000
+# cycle=1
+# elif [ $nlen -lt 1000 -a $nseq -lt 200 ]; then
+# distance="local"
+# iterate=2
+# cycle=1
+# elif [ $nlen -lt 10000 -a $nseq -lt 500 ]; then
+# distance="ktuples"
+# iterate=2
+# cycle=2
+# elif [ $nseq -lt 200000 ]; then
+# distance="ktuples"
+# iterate=0
+# treeinopt=" -U "
+# treein=1
+# pileuporshuffle="a"
+# elif [ $nlen -lt 3000 ]; then
+# distance="parttree"
+# partdist="localalign"
+# algopt=" "
+# algoptit=" "
+## algspecified=1
+# cycle=1
+# else
+# distance="parttree"
+# partdist="ktuples"
+# algopt=" "
+# algoptit=" "
+## algspecified=1
+# cycle=1
+# fi
+
+
+ if [ $fragment -ne 0 ]; then
+ norg=`expr $nseq '-' $nadd`
+ npair=`expr $norg '*' $nadd`
+ echo "nadd = " $nadd 1>>"$progressfile"
+ echo "npair = " $npair 1>>"$progressfile"
+ echo "nseq = " $nseq 1>>"$progressfile"
+ echo "nlen = " $nlen 1>>"$progressfile"
+# nagasa check!
+#
+ if [ $npair -gt 10000000 -o $nlen -gt 500000 ]; then # 2015/Jun
+ distance="ktuples"
+ echo "use ktuples, size=$tuplesize!" 1>>"$progressfile"
+ elif [ $npair -gt 3000000 -o $nlen -gt 100000 ]; then # 2015/Jun
+ distance="multi"
+ weighti="0.0"
+ echo "use multipair, weighti=0.0!" 1>>"$progressfile"
+ else
+ distance="multi"
+ echo "use multipair, weighti=$weighti!" 1>>"$progressfile"
+ fi
+ pairspecified=1
fi
fi
+ if [ `awk "BEGIN {print( 0.0+\"$sueff\" < 0.0 || 0.0+\"$sueff\" > 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "The argument of --mixedlinkage must be between 0.0 and 1.0" 1>>"$progressfile"
+ exit 1;
+ fi
+
+ if [ $allowshift -eq 1 ]; then
+ if [ $unalignspecified -ne 1 ]; then
+ unalignlevel="0.8"
+ fi
+ if [ $shiftpenaltyspecified -ne 1 ]; then
+ spfactor="2.00"
+ fi
+ fi
+
+ if [ $opdistspecified -ne 1 ]; then
+ gopdist=$gop
+ fi
+
+ if [ $unalignlevel != "0.0" -o `awk "BEGIN {print( 0.0+\"$spfactor\" < 100.0 )}"` -gt 0 ]; then
+ nmodel=" -: "
+ termgapopt=" "
+ if [ $distance = "localgenaf" ]; then
+ printf "\n%s\n" "The combination of --allowshift and --genafpair (E-INS-i/-1) is not supported." 1>>"$progressfile"
+ printf "%s\n" "Instead, please try --allowshift --globalpair (G-INS-i/-1 in the web version)," 1>>"$progressfile"
+ printf "%s\n\n" "which covers the situation for --genafpair (E-INS-i/-1), too." 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ $distance != "global" -o `awk "BEGIN {print( 0.0+\"$weighti\" < 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "At present, --unalignlevel # or --allowshift is supported only with the --globalpair option." 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ $fragment -ne 0 ]; then
+ printf "\n%s\n\n" "At present, --unalignlevel # or --allowshift is not supported with the --addfragments option." 1>>"$progressfile"
+ exit 1;
+ fi
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$spfactor\" < 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n" "shiftpenalty must be >1." 1>>"$progressfile"
+ exit 1;
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$fixthreshold\" < 0.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "The 'fix' parameter must be >= 0.0" 1>>"$progressfile"
+ exit 1;
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$unalignlevel\" < 0.0 || 0.0+\"$unalignlevel\" > 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "The 'unalignlevel' parameter must be between 0.0 and 1.0" 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ `awk "BEGIN {print( 0.0+\"$unalignlevel\" > 0.0 )}"` -gt 0 ]; then
+ laof="0"
+ lexp="0"
+ pgaof="0"
+ pgexp="0"
+ LEXP="0"
+ GEXP="0"
+ termgapopt=" "
+# if [ $auto -eq 1 -o $fragment -ne 0 -o $iterate -gt 0 ]; then
+ if [ $fragment -ne 0 ]; then
+ printf "\n%s\n\n" "At present, the 'unalignlevel > 0' mode is not supported with the --addfragments option." 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ $distance = "parttree" ]; then
+ printf "\n%s\n\n" "At present, the 'unalignlevel > 0' mode is not supported in the (dp)parttree option." 1>>"$progressfile"
+ exit 1;
+ fi
+ if [ $distance = "localgenaf" ]; then
+ printf "\n%s\n" "The --genafpair is not supported in the 'unalignlevel > 0' mode." 1>>"$progressfile"
+ printf "%s\n" "Instead, please try --unalignlevel xx --globalpair," 1>>"$progressfile"
+ printf "%s\n\n" "which covers the situation for --genafpair (E-INS-i), too." 1>>"$progressfile"
+ exit 1;
+ fi
+# if [ $distance != "ktuples" -a `awk "BEGIN {print( 0.0+\"$weighti\" > 0.0 )}"` -gt 0 -a $iterate -gt 0 ]; then
+# printf "\n%s\n\n" "Please add --weighti 0.0, for now." 1>>"$progressfile"
+# exit 1;
+# fi
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$similarityoffset\" != 0.0 && 0.0+\"$unalignlevel\" != 0.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "Do not simultaneously specify --similaritylevel and --unalignlevel" 1>>"$progressfile"
+ exit 1;
+ fi
+
+ if [ `awk "BEGIN {print( 0.0+\"$similarityoffset\" < -1.0 || 0.0+\"$similarityoffset\" > 1.0 )}"` -gt 0 ]; then
+ printf "\n%s\n\n" "Similarity must be between -1.0 and +1.0" 1>>"$progressfile"
+ exit 1;
+ fi
+ aof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $aof}"`
+ laof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $laof}"`
+ pgaof=`awk "BEGIN{print 0.0 + \"$similarityoffset\" + $pgaof}"`
+
+
if [ $parallelizationstrategy = "BESTFIRST" -o $parallelizationstrategy = "BAATARI0" ]; then
iteratelimit=254
else
rnaopt=" -e $rgep -o $rgop -c $weightm -r $weightr -R $rnascoremtx "
# rnaoptit=" -o $rgop -BT -c $weightm -r $weightr -R "
rnaoptit=" -o $rgop -F -c $weightm -r $weightr -R "
- elif [ $mccaskill -eq 1 -o $contrafold -eq 1 ]; then
+ elif [ $mccaskill -eq 1 -o $dafs -eq 1 -o $contrafold -eq 1 ]; then
rnaopt=" -o $rgop -c $weightm -r $weightr "
# rnaoptit=" -e $rgep -o $rgop -BT -c $weightm -r $weightr $rnascoremtx "
rnaoptit=" -e $rgep -o $rgop -F -c $weightm -r $weightr $rnascoremtx "
rnaoptit=" -F "
fi
- model="$sbstmodel $kappa $fmodel"
+# if [ $algspecified -eq 0 ]; then
+# if [ $distance = "parttree" ]; then
+# algopt=" -Q "
+# algoptit=" "
+# else
+# algopt=" "
+# algoptit=" "
+# fi
+# fi
+
+ if [ $sprigorous -eq 1 ]; then
+ algopt=" -@ "
+ if [ $iterate -gt 0 ]; then
+ if [ $numthreadsit -eq 0 ]; then
+ algoptit=" -@ -B -Z -z 1000 "
+ else
+ echo "" 1>>"$progressfile"
+ echo "At present, the combination of --sp and iterative refinement is supported only in a single thread." 1>>"$progressfile"
+ echo "Please try \"--thread -1 --threadit 0\", which runs the iterative refinment calculation on a single thread." 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ exit 1;
+# algoptit=" -@ -B -z 1000 "
+ fi
+ fi
+ termgapopt=" "
+ fft=0
+ memopt=" -N "
+ fi
+
+ model="$sbstmodel $kappa $fmodel $nmodel"
if [ $er -eq 1 ]; then
echo "------------------------------------------------------------------------------" 1>&2
# echo "" 1>&2
# echo " Usage: `basename $0` [options] inputfile > outputfile" 1>&2
echo " http://mafft.cbrc.jp/alignment/software/" 1>&2
- echo " NAR 30:3059-3066 (2002), Briefings in Bioinformatics 9:286-298 (2008)" 1>&2
+ echo " MBE 30:772-780 (2013), NAR 30:3059-3066 (2002)" 1>&2
# echo "------------------------------------------------------------------------------" 1>&2
# echo " % mafft in > out" 1>&2
echo "------------------------------------------------------------------------------" 1>&2
echo "--clustalout : Output: clustal format, default: fasta" 1>&2
echo "--reorder : Outorder: aligned, default: input order" 1>&2
echo "--quiet : Do not report progress" 1>&2
- echo "--thread # : Number of threads. (# must be <= number of physical cores - 1)" 1>&2
+ echo "--thread # : Number of threads (if unsure, --thread -1)" 1>&2
# echo "" 1>&2
# echo " % mafft --maxiterate 1000 --localpair in > out (L-INS-i)" 1>&2
# echo " most accurate in many cases, assumes only one alignable domain" 1>&2
exit 1
fi
fi
+ if [ $distance = "last" -o $distance = "lastmulti" ]; then
+ if [ ! -x "$prefix/lastal" -o ! -x "$prefix/lastdb" ]; then
+ echo "" 1>&2
+ echo "== Install LAST ============================================================" 1>&2
+ echo "LAST (Kielbasa, Wan, Sato, Horton, Frith 2011 Genome Res. 21:487) is required." 1>&2
+ echo "http://last.cbrc.jp/" 1>&2
+ echo "http://mafft.cbrc.jp/alignment/software/xxxxxxx.html " 1>&2
+ echo "============================================================================" 1>&2
+ echo "" 1>&2
+ exit 1
+ fi
+ fi
if [ $distance = "lara" -o $distance = "slara" ]; then
if [ ! -x "$prefix/mafft_lara" ]; then
echo "" 1>&2
exit 1
fi
fi
- if [ $distance = "scarna" ]; then
+ if [ $distance = "scarna" -o $mccaskill -eq 1 ]; then
if [ ! -x "$prefix/mxscarnamod" ]; then
echo "" 1>&2
echo "== Install MXSCARNA ======================================================" 1>&2
echo "MXSCARNA (Tabei et al. BMC Bioinformatics 2008 9:33) is required." 1>&2
echo "Please 'make' at the 'extensions' directory of the MAFFT source package," 1>&2
echo "which contains the modified version of MXSCARNA." 1>&2
- echo "http://align.bmr.kyushu-u.ac.jp/mafft/software/source.html " 1>&2
+ echo "http://mafft.cbrc.jp/alignment/software/source.html " 1>&2
echo "==========================================================================" 1>&2
echo "" 1>&2
exit 1
fi
fi
- if [ $mccaskill -eq 1 ]; then
- if [ ! -x "$prefix/mxscarnamod" ]; then
+ if [ $distance = "dafs" -o $dafs -eq 1 ]; then
+ if [ ! -x "$prefix/dafs" ]; then
echo "" 1>&2
- echo "== Install MXSCARNA ======================================================" 1>&2
- echo "MXSCARNA (Tabei et al. BMC Bioinformatics 2008 9:33) is required." 1>&2
- echo "Please 'make' at the 'extensions' directory of the MAFFT source package," 1>&2
- echo "which contains the modified version of MXSCARNA." 1>&2
- echo "http://align.bmr.kyushu-u.ac.jp/mafft/software/source.html " 1>&2
+ echo "== Install DAFS===========================================================" 1>&2
+ echo "DAFS (Sato et al. Journal 2012 issue:page) is required." 1>&2
+ echo "http://www.ncrna.org/ " 1>&2
echo "==========================================================================" 1>&2
echo "" 1>&2
exit 1
if [ $cycle -eq 0 ]; then
treeoutopt="-t -T"
iterate=0
- if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then
- distance="distonly"
- fi
+ weighti="0.0" # 2016Jul31, tbfast.c kara idou
+# if [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o $distance = "globalgenaf" ]; then # 2012/04, localpair --> local alignment distance
+# if [ $distance = "global" ]; then
+# distance="distonly"
+# fi
if [ $treeout -eq 1 ]; then
parttreeoutopt="-t"
groupsize=1
fi
if [ $distout -eq 1 ]; then
distoutopt="-y -T"
+ if [ $treeout -eq 0 ]; then
+ treeoutopt=""
+ fi
fi
else
if [ $treeout -eq 1 ]; then
cycle=3
fi
- if [ $nseq -gt 4000 -a $iterate -gt 1 ]; then
+ if [ $nseq -gt 60000 -a $iterate -gt 1 ]; then # 2014/Oct/22, test
echo "Too many sequences to perform iterative refinement!" 1>&2
echo "Please use a progressive method." 1>&2
exit 1
fi
+ if [ $distance = "lastmulti" -o $distance = "multi" ]; then
+ if [ $fragment -eq 0 ]; then
+ echo 'Specify --addfragments too' 1>&2
+ exit 1
+ fi
+ fi
+
+ if [ $fragment -ne 0 ]; then
+ if [ $pairspecified -eq 0 ]; then
+ distance="multi"
+ fi
+ if [ $distance != "multi" -a $distance != "hybrid" -a $distance != "lastmulti" -a $distance != "local" -a $distance != "last" -a $distance != "ktuples" -a $distance != "ktuplesmulti" ]; then
+ echo 'Specify --multipair, --lastmultipair, --lastpair, --localpair, --6merpair, --6mermultipair or --hybridpair' 1>&2
+ exit 1
+ fi
+ fi
+
+ if [ "$memopt" = " -M -B " -a "$distance" != "ktuples" ]; then
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+
+ if [ $distance = "parttree" ]; then
+ if [ $mergetable != "/dev/null" ]; then
+ echo "The combination of (dp)parttree and merge is Impossible. " 1>&2
+ exit 1
+ fi
+ if [ $addfile != "/dev/null" ]; then
+ echo "The combination of (dp)parttree and add(fragments) is Impossible. " 1>&2
+ exit 1
+ fi
+ if [ $seed != "x" -o $seedtable != "x" ]; then
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ if [ $iterate -gt 1 ]; then
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ if [ $outorder = "aligned" ]; then
+ outorder="input"
+ fi
+ outorder="input" # partorder ga kiku
+ if [ $partdist = "localalign" ]; then
+ splitopt=" -U " # -U -l -> fast
+ cycle=1
+ elif [ $partdist = "fasta" ]; then
+ splitopt=" -S "
+ cycle=1
+ else
+ splitopt=" "
+ fi
+ fi
- if [ $distance = "sixtuples" -a \( $seed = "x" -a $seedtable = "x" \) ]; then
- localparam=" "
- elif [ $distance = "sixtuples" -a \( $seed != "x" -o $seedtable != "x" \) ]; then
+ if [ \( $distance = "ktuples" -o $distance = "ktuplesmulti" \) -a \( $seed = "x" -a $seedtable = "x" -a $ownlist = "/dev/null" -a $pdblist = "/dev/null" -a $enrichstr -eq 0 \) ]; then
+ localparam=""
+ weighti="0.0"
+ elif [ \( $distance = "ktuples" -o $distance = "ktuplesmulti" \) -a \( $seed != "x" -o $seedtable != "x" -o $ownlist != "/dev/null" -o $pdblist != "/dev/null" -o $enrichstr -eq 1 \) ]; then
if [ $cycle -lt 2 ]; then
- cycle=2 # nazeda
+ cycle=2 # disttbfast ha seed hi-taiou # chuui 2014Aug21
fi
if [ $iterate -lt 2 ]; then
echo "############################################################################" 1>&2
fi
localparam="-l "$weighti
elif [ $distance = "parttree" ]; then
- localparam=" "
+ localparam=""
+ weighti="0.0"
if [ $groupsize -gt -1 ]; then
cycle=1
fi
else
- localparam=" -l "$weighti
+ localparam="-B -l "$weighti # weighti=0 demo bunkatsu nashi
if [ $cycle -gt 1 ]; then # 09/01/08
cycle=1
fi
fi
+
if [ $distance = "localgenaf" -o $distance = "globalgenaf" ]; then
aof="0.000"
- fi
-
- if [ "$memopt" = " -M -B " -a "$distance" != "sixtuples" ]; then
- echo "Impossible" 1>&2
- exit 1
- fi
-#exit
-
- if [ $distance = "parttree" ]; then
- if [ $seed != "x" -o $seedtable != "x" ]; then
- echo "Impossible" 1>&2
- exit 1
- fi
- if [ $iterate -gt 1 ]; then
- echo "Impossible" 1>&2
- exit 1
- fi
- if [ $outorder = "aligned" ]; then
- outorder="input"
- fi
- outorder="input" # partorder ga kiku
- if [ $partdist = "localalign" ]; then
- splitopt=" -L " # -L -l -> fast
- elif [ $partdist = "fasta" ]; then
- splitopt=" -S "
- else
- splitopt=" "
+ if [ $oldgenafparam -ne 1 ]; then
+ laof="0.0"
+ lexp="0.0"
+# LEXP="0.0" # default = 0.0
+ usenaivepairscore="-Z"
fi
fi
echo 'Use either one of seedtable and seed. Not both.' 1>&2
exit 1
fi
-# if [ $seedtable != "x" -a $anysymbol -gt 0 ]; then
-# echo 'The combination of --seedtable and --anysymbol is not supported.' 1>&2
-# exit 1
-# fi
+ if [ $f2clext = "-E" -a $anysymbol -gt 0 ]; then
+ echo '' 1>&2
+ echo 'At present, the combination of --text and ( --anysymbol or --preservecase ) is impossible.' 1>&2
+ echo '' 1>&2
+ exit 1
+ fi
+
+ if [ $f2clext = "-E" -a $aamatrix != "/dev/null" ]; then
+ echo '' 1>&2
+ echo 'At present, the combination of --text and (--aamatrix) is impossible.' 1>&2
+ echo '' 1>&2
+ exit 1
+ fi
if [ $treein -eq 1 ]; then
# if [ $iterate -gt 0 ]; then
# echo 'Not supported yet.' 1>&2
# exit 1
# fi
- cycle=1
+ if [ ! -s $TMPFILE/_guidetree ]; then
+ if [ $distance != "ktuples" ]; then
+ echo "Not supported yet" 1>>"$progressfile"
+ exit 1
+ fi
+ if [ $pileuporshuffle = "p" ]; then
+ echo "pileup" > $TMPFILE/_guidetree
+# weightopt=" -u " -> disttbfast.c?
+# numthreadstb=0 -> disttbfast.c
+ cycle=1 # disttbfast. shitei
+ elif [ $pileuporshuffle = "s" ]; then
+ echo "shuffle $randomseed" > $TMPFILE/_guidetree
+# numthreadstb=0 -> disttbfast.c
+# weightopt=" -u " -> disttbfast.c?
+ cycle=1 # disttbfast.c dem shitei
+ elif [ $pileuporshuffle = "C" ]; then
+ echo "very compact" > $TMPFILE/_guidetree
+ elif [ $pileuporshuffle = "c" ]; then
+ echo "compact " "$initialramusage" > $TMPFILE/_guidetree
+ elif [ $pileuporshuffle = "a" ]; then
+ echo "auto $randomseed 200" > $TMPFILE/_guidetree
+ fi
+ fi
fi
- if [ "$addarg0" != " " ]; then
- "$prefix/countlen" < $TMPFILE/_addfile > $TMPFILE/addsize
- nadd=`awk '{print $1}' $TMPFILE/addsize`
- if [ $nadd -eq "0" ]; then
- echo Check $addfile 1>&2
- exit 1;
+ if [ $nadd -gt "0" ]; then
+ if [ $fragment -eq "1" ]; then
+ addarg="$addarg0 $nadd -g -0.01"
+ addsinglearg=""
+ cycle=1 # chuui 2014Aug25
+ elif [ $fragment -eq "-1" ]; then
+ addarg="$addarg0 $nadd"
+ addsinglearg="-V" # allowlongadds, 2014/04/02
+ cycle=1 # chuui 2014Aug25
+ elif [ $fragment -eq "-2" ]; then
+ addarg="$addarg0 $nadd"
+ addsinglearg="-V" # allowlongadds + smoothing
+ add2ndhalfarg=$add2ndhalfarg" -p "
+ cycle=1 # chuui 2014Aug25
+ usenaivepairscore="-Z" # 2015Jun01
+ laof=0.0 # 2015Jun01
+ lexp=0.0 # 2015Jun01
+ else
+ addarg="$addarg0 $nadd"
+ addsinglearg=""
fi
- addarg="$addarg0 $nadd"
- cycle=1
+
+# cycle=1 # chuui 2014Aug19
iterate=0
- if [ $seed != "x" -o $seedtable != "x" ]; then
- echo 'Impossible' 1>&2;
- echo 'Use either ONE of --seed, --seedtable, --addprofile and --add.' 1>&2
- exit 1;
+# treealg=" -q " ## 2012/01/24 ## removed 2012/02/06
+ fi
+
+
+ if [ -z "$localparam" -a $fragment -eq 0 -a $distance != "parttree" ]; then
+# echo "use disttbfast"
+# echo cycle = $cycle
+ cycletbfast=1 # tbfast wo jikkou shinai
+ cycledisttbfast=$cycle # disttbfast ni -E cycle wo watasu
+ if [ $cycledisttbfast -eq 0 ]; then # --treeout de tsukau
+ cycledisttbfast=1
fi
+ else
+# echo "use tbfast"
+# echo cycle = $cycle
+ cycletbfast=$cycle # 1 ijou nara jikkou
+ cycledisttbfast=1 # disttbfast ha ikkai dake
fi
- if [ $mccaskill -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then
- if [ $distance = "sixtuples" ]; then
+# echo localparam=
+# echo $localparam
+# echo cycletbfast=
+# echo $cycletbfast
+# echo cycledisttbfast=
+# echo $cycledisttbfast
+
+#exit
+
+ if [ $adjustdirection -gt 0 -a $seed != "x" ]; then
+ echo '' 1>&2
+ echo 'The combination of --adjustdirection(accurately) and --seed is not supported.' 1>&2
+ echo '' 1>&2
+ exit 1
+ fi
+
+
+ if [ $mccaskill -eq 1 -o $dafs -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then
+ if [ $distance = "ktuples" ]; then
echo 'Not supported.' 1>&2
- echo 'Please add --globalpair, --localpair, --scarnapair,' 1>&2
+ echo 'Please add --globalpair, --localpair, --scarnapair, --dafspair' 1>&2
echo '--larapair, --slarapair, --foldalignlocalpair or --foldalignglobalpair' 1>&2
exit 1
fi
+ if [ $f2clext = "-E" ]; then
+ echo '' 1>&2
+ echo 'For RNA alignment, the --text mode is impossible.' 1>&2
+ echo '' 1>&2
+ exit 1
+ fi
fi
- if [ $mccaskill -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then
- if [ $distance = "scarna" -o $distance = "lara" -o $distance = "slara" -o $distance = "foldalignlocal" -o $distance = "foldalignglobal" ]; then
- strategy="X-I"
+# cycle ga atode henkou sareru node koko de strategy no namae wo kimeru.
+# kokokara
+ if [ $pileuporshuffle = "p" ]; then
+ strategy="Pileup-"
+ elif [ $pileuporshuffle = "s" ]; then
+ strategy="Randomchain-"
+ elif [ $mccaskill -eq 1 -o $dafs -eq 1 -o $rnaalifold -eq 1 -o $contrafold -eq 1 ]; then
+ if [ $distance = "scarna" -o $distance = "dafs" -o $distance = "lara" -o $distance = "slara" -o $distance = "foldalignlocal" -o $distance = "foldalignglobal" ]; then
+ strategy="X-"
elif [ $distance = "global" -o $distance = "local" -o $distance = "localgenaf" -o "globalgenaf" ]; then
- strategy="Q-I"
+ strategy="Q-"
fi
elif [ $distance = "fasta" -a $sw -eq 0 ]; then
- strategy="F-I"
+ strategy="F-"
elif [ $distance = "fasta" -a $sw -eq 1 ]; then
- strategy="H-I"
+ strategy="H-"
elif [ $distance = "blast" ]; then
- strategy="B-I"
+ strategy="B-"
elif [ $distance = "global" -o $distance = "distonly" ]; then
- strategy="G-I"
+ strategy="G-"
elif [ $distance = "local" ]; then
- strategy="L-I"
+ strategy="L-"
+ elif [ $distance = "last" ]; then
+ strategy="Last-"
+ elif [ $distance = "hybrid" ]; then
+ strategy="Hybrid-"
+ elif [ $distance = "multi" ]; then
+ strategy="Multi-"
+ elif [ $distance = "lastmulti" ]; then
+ strategy="LastMulti-"
elif [ $distance = "localgenaf" ]; then
- strategy="E-I"
+ strategy="E-"
elif [ $distance = "globalgenaf" ]; then
- strategy="K-I"
+ strategy="K-"
elif [ $fft -eq 1 ]; then
strategy="FFT-"
else
strategy="NW-"
fi
+# if [ `echo "$weighti>0.0" | bc` -gt 0 ]; then
+ if [ `awk "BEGIN {print(0.0+\"$weighti\">0.0)}"` -gt 0 ]; then
+ strategy=$strategy"I"
+ fi
strategy=$strategy"NS-"
if [ $iterate -gt 0 ]; then
strategy=$strategy"i"
else
strategy=$strategy"PartTree-"$cycle
fi
+ elif [ $fragment -eq 1 ]; then
+ strategy=$strategy"fragment"
+ elif [ $fragment -eq -1 ]; then
+ strategy=$strategy"full"
+ elif [ $fragment -eq -2 ]; then
+ strategy=$strategy"long"
else
strategy=$strategy$cycle
fi
fi
if [ $outputformat = "clustal" -a $outorder = "aligned" ]; then
- outputopt=" -c $strategy -r $TMPFILE/order "
+ outputopt=" -c $strategy -r $TMPFILE/order $f2clext "
elif [ $outputformat = "clustal" -a $outorder = "input" ]; then
- outputopt=" -c $strategy "
+ outputopt=" -c $strategy $f2clext "
elif [ $outputformat = "phylip" -a $outorder = "aligned" ]; then
outputopt=" -y -r $TMPFILE/order "
elif [ $outputformat = "phylip" -a $outorder = "input" ]; then
else
outputopt="null"
fi
+# kokomade
+
+
- (
- cd $TMPFILE;
+# ( # 2017/Mar/17
+ pushd $TMPFILE > /dev/null;
- if [ $quiet -gt 0 ]; then
+ cat /dev/null > pre
- if [ $anysymbol -eq 1 ]; then
- mv infile orig
- "$prefix/replaceu" -i orig > infile 2>/dev/null || exit 1
+# echo "nseq = " $nseq 1>>"$progressfile"
+# echo "distance = " $distance 1>>"$progressfile"
+# echo "iterate = " $iterate 1>>"$progressfile"
+# echo "cycle = " $cycle 1>>"$progressfile"
+
+ if [ $anysymbol -eq 1 ]; then
+ mv infile orig
+ "$prefix/replaceu" $seqtype -i orig > infile 2>>"$progressfile" || exit 1
+ fi
+
+ if [ $mergetable != "/dev/null" ]; then
+ if [ $nadd -gt "0" ]; then
+ echo "Impossible" 1>&2
+ exit 1
fi
+# if [ $seed != "x" -o $seedtable != "x" ]; then
+# echo "This version does not support the combination of merge and seed." 1>&2
+# exit 1
+# fi
+# iterate=0 # 2013/04/16
+ mergearg="-H $seedoffset"
+ fi
- if [ $seed != "x" ]; then
- mv infile infile2
- if [ $anysymbol -eq 1 ]; then
- mv orig orig2
- cat /dev/null > orig
- fi
- cat /dev/null > infile
- cat /dev/null > hat3.seed
- seedoffset=0
-# echo "seednseq="$seednseq
-# echo "seedoffset="$seedoffset
- set $seednseq > /dev/null
-# echo $#
- while [ $# -gt 1 ]
- do
- shift
-# echo "num="$#
-
- if [ $anysymbol -eq 1 ]; then
- cat seed$# >> orig
- "$prefix/replaceu" -i seed$# -o $seedoffset > clean 2>/dev/null || exit 1
- mv clean seed$#
- fi
- "$prefix/multi2hat3s" -t $nseq -o $seedoffset -i seed$# >> infile 2>/dev/null || exit 1
- cat hat3 >> hat3.seed
-# echo "$1"
- seedoffset=`expr $seedoffset + $1`
-# echo "$1"
-# echo "seedoffset="$seedoffset
- done;
-# echo "seedoffset="$seedoffset
- if [ $anysymbol -eq 1 ]; then
- "$prefix/replaceu" -i orig2 -o $seedoffset >> infile 2>/dev/null || exit 1 # yarinaoshi
- cat orig2 >> orig
- else
- cat infile2 >> infile
- fi
- elif [ $seedtable != "x" ]; then
- cat _seedtablefile > hat3.seed
+ if [ $adjustdirection -gt 0 ]; then
+ if [ $fragment -ne 0 ]; then
+ fragarg="-F" #
else
- cat /dev/null > hat3.seed
+ fragarg="-F" # 2014/02/06, do not consider other additional sequences, even in the case of --add
fi
-# cat hat3.seed
+ if [ $adjustdirection -eq 1 ]; then
+ "$prefix/makedirectionlist" $fragarg -C $numthreads -m -I $nadd -i infile -t 0.00 -r 5000 -o a > _direction 2>>"$progressfile"
+ elif [ $adjustdirection -eq 2 ]; then
+ "$prefix/makedirectionlist" $fragarg -C $numthreads -m -I $nadd -i infile -t 0.00 -r 100 -o a -d > _direction 2>>"$progressfile"
+ fi
+ "$prefix/setdirection" $mergearg -d _direction -i infile > infiled 2>>"$progressfile" || exit
+ mv infiled infile
+ if [ $anysymbol -eq 1 ]; then
+ "$prefix/setdirection" $mergearg -d _direction -i orig -r > origd 2>>"$progressfile" || exit
+ mv origd orig
+ fi
+ fi
+ if [ $seed != "x" -o $seedtable != "x" ]; then
+ if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then
+ echo "The combination of --seed and (--pdbidlist or --pdbfilelist) is impossible." 1>>"$progressfile"
+ exit 1
+ fi
+ if [ $enrich -eq 1 ]; then
+ echo "The combination of --seed and (--enrich, --enrichseq or --enrichstr) is impossible at present." 1>>"$progressfile"
+ exit 1
+ fi
+ fi
- if [ $mccaskill -eq 1 ]; then
- "$prefix/mccaskillwrap" -C $numthreads -d "$prefix" -i infile > hat4 2>/dev/null || exit 1
- elif [ $contrafold -eq 1 ]; then
- "$prefix/contrafoldwrap" -d "$prefix" -i infile > hat4 2>/dev/null || exit 1
+ if [ $enrich -eq 1 ]; then
+ if [ $ownlist != "/dev/null" ]; then
+ echo "Warning: Sequence homologs of the structures given with the --pdbfilelist option cannot be collected.\n" 1>>"$progressfile"
fi
- if [ $distance = "fasta" ]; then
- "$prefix/dndfast7" $swopt < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "blast" ]; then
- "$prefix/dndblast" < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "foldalignlocal" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $lexp -f $lgop -h $laof -H -d "$prefix" < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "foldalignglobal" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $pgexp -f $pggop -h $pgaof -H -o -global -d "$prefix" < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "slara" ]; then
- "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -T -d "$prefix" < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "lara" ]; then
- "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -B -d "$prefix" < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "scarna" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -s -d "$prefix" < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "global" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -F < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "local" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -L < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "globalgenaf" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -O $GGOP -E $GEXP -K < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "localgenaf" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -O $LGOP -E $LEXP -N < infile > /dev/null 2>&1 || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "distonly" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -t < infile > /dev/null 2>&1 || exit 1
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "parttree" ]; then
- "$prefix/splittbfast" -Q $splitopt $partorderopt $parttreeoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft -p $partsize -s $groupsize $treealg -i infile > pre 2>/dev/null || exit 1
- mv hat3.seed hat3
+ echo "SEEKQUENCER (http://sysimm.ifrec.osaka-u.ac.jp/seekquencer/) is" 1>>"$progressfile"
+ if [ $pdblist != "/dev/null" ]; then
+ echo "collecting homoplogs of the input sequences and the structures given with the --pdbidlist option." 1>>"$progressfile"
+ perl "$prefix/seekquencer_premafft.pl" $seektarget -run thread -trd 2 -seqd uniref90 -blim 1000 -noin -seqf infile -idf pdblist -out seekout -mod mafftash-split 2>>"seekerr"
+ seekres="$?"
else
- "$prefix/disttbfast" -O $outnum $addarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $algopt $treealg $scoreoutarg < infile > pre 2>/dev/null || exit 1
- mv hat3.seed hat3
+ echo "collecting homologs of the input sequences." 1>>"$progressfile"
+ perl "$prefix/seekquencer_premafft.pl" $seektarget -run thread -trd 2 -seqd uniref90 -blim 1000 -noin -seqf infile -out seekout -mod mafftash-split 2>>"seekerr"
+ seekres="$?"
fi
- while [ $cycle -gt 1 ]
- do
- if [ $distance = "parttree" ]; then
- mv pre infile
- "$prefix/splittbfast" -Z -Q $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft -p $partsize -s $groupsize $treealg -i infile > pre 2>/dev/null || exit 1
+ cat seekerr 1>>"$progressfile"
+
+ if [ $seekres -ne "0" ]; then
+ echo "Error in SEEKQUENCER" 1>>"$progressfile"
+ exit 1;
+ fi
+ echo "Done." 1>>"$progressfile"
+
+ if [ $enrichseq -eq 1 ]; then
+# cat seekout.seq >> infile
+ if [ $anysymbol -eq 1 ]; then
+ "$prefix/replaceu" $seqtype -i seekout.seq -o $nseq >> infile
+ cat seekout.seq >> orig
else
- "$prefix/tbfast" -O $outnum -C $numthreads $rnaopt $weightopt $treeoutopt $distoutopt $memopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt -J $treealg $scoreoutarg < pre > /dev/null 2>&1 || exit 1
+ "$prefix/replaceu" $seqtype -i seekout.seq | sed 's/_os_[0-9]*_oe_//' >> infile
fi
- cycle=`expr $cycle - 1`
- done
- if [ $iterate -gt 0 ]; then
- if [ $distance = "sixtuples" ]; then
- "$prefix/dndpre" -C $numthreads < pre > /dev/null 2>&1 || exit 1
+
+ fi
+ if [ $enrichstr -eq 1 ]; then
+ nseekstr=`wc -l < seekout.str`
+ if [ $nseekstr -gt 1 ]; then
+ cat seekout.str >> pdblist
+ pdblist="tsukaimasu"
fi
- "$prefix/dvtditr" -C $numthreads -t $randomseed $rnaoptit $memopt $scorecalcopt $localparam -z 50 $seqtype $model -f "-"$gop -h "-"$aof -I $iterate $weightopt $treeinopt $algoptit $treealg -p $parallelizationstrategy $scoreoutarg < pre > /dev/null 2>&1 || exit 1
fi
- else
+ fi
+ if [ $seed != "x" ]; then
+ mv infile infile2
if [ $anysymbol -eq 1 ]; then
- mv infile orig
- "$prefix/replaceu" -i orig > infile || exit 1
+ mv orig orig2
+ cat /dev/null > orig
fi
+ cat /dev/null > infile
+ cat /dev/null > hat3.seed
+ seedoffset=0
+# echo "seednseq="$seednseq
+# echo "seedoffset="$seedoffset
+ set $seednseq >> "$progressfile"
+# echo $#
+ while [ $# -gt 1 ]
+ do
+ shift
+# echo "num="$#
- if [ $seed != "x" ]; then
- mv infile infile2
if [ $anysymbol -eq 1 ]; then
- mv orig orig2
- cat /dev/null > orig
+ cat seed$# >> orig
+ "$prefix/replaceu" $seqtype -i seed$# -o $seedoffset > clean 2>>"$progressfile" || exit 1
+ mv clean seed$#
fi
- cat /dev/null > infile
- cat /dev/null > hat3.seed
- seedoffset=0
-# echo "seednseq="$seednseq
-# echo "seedoffset="$seedoffset
- set $seednseq > /dev/null
-# echo $#
- while [ $# -gt 1 ]
- do
- shift
-# echo "num="$#
-
- if [ $anysymbol -eq 1 ]; then
- cat seed$# >> orig
- "$prefix/replaceu" -i seed$# -o $seedoffset > clean || exit 1
- mv clean seed$#
- fi
- "$prefix/multi2hat3s" -t $nseq -o $seedoffset -i seed$# >> infile || exit 1
- cat hat3 >> hat3.seed
-# echo "$1"
- seedoffset=`expr $seedoffset + $1`
-# echo "$1"
-# echo "seedoffset="$seedoffset
- done;
+ "$prefix/multi2hat3s" -t $nseq -o $seedoffset -i seed$# >> infile 2>>"$progressfile" || exit 1
+ cat hat3 >> hat3.seed
+# echo "$1"
+ seedoffset=`expr $seedoffset + $1`
+# echo "$1"
# echo "seedoffset="$seedoffset
- if [ $anysymbol -eq 1 ]; then
- "$prefix/replaceu" -i orig2 -o $seedoffset >> infile || exit 1 # yarinaoshi
- cat orig2 >> orig
- else
- cat infile2 >> infile
- fi
- elif [ $seedtable != "x" ]; then
- cat _seedtablefile > hat3.seed
+ done;
+# echo "seedoffset="$seedoffset
+ if [ $anysymbol -eq 1 ]; then
+ "$prefix/replaceu" $seqtype -i orig2 -o $seedoffset >> infile 2>>"$progressfile" || exit 1 # yarinaoshi
+ cat orig2 >> orig
else
- cat /dev/null > hat3.seed
+ cat infile2 >> infile
fi
-# cat hat3.seed
+ elif [ $seedtable != "x" ]; then
+ cat _seedtablefile > hat3.seed
+ elif [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then
+ mv infile infile2
+ if [ $anysymbol -eq 1 ]; then
+ mv orig orig2
+ cat /dev/null > orig
+ fi
+ cat /dev/null > infile
+
+ echo "strdir = " 1>>"$progressfile"
+ echo $strdir 1>>"$progressfile"
+
+ echo "Calling DASH (http://sysimm.ifrec.osaka-u.ac.jp/dash/)" 1>>"$progressfile"
+ perl "$prefix/mafftash_premafft.pl" -p pdblist -o ownlist -d "$strdir" 2>>"dasherr"
+ dashres="$?"
+ cat dasherr 1>>"$progressfile"
- if [ $mccaskill -eq 1 ]; then
- "$prefix/mccaskillwrap" -C $numthreads -d "$prefix" -i infile > hat4 || exit 1
- elif [ $contrafold -eq 1 ]; then
- "$prefix/contrafoldwrap" -d "$prefix" -i infile > hat4 || exit 1
+ if [ $dashres -ne "0" ]; then
+ echo "Error in DASH" 1>>"$progressfile"
+ exit 1;
fi
- if [ $distance = "fasta" ]; then
- "$prefix/dndfast7" $swopt < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "blast" ]; then
- "$prefix/dndblast" < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "foldalignlocal" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $lexp -f $lgop -h $laof -H -d "$prefix" < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "foldalignglobal" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $pgexp -f $pggop -h $pgaof -H -o -global -d "$prefix" < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "slara" ]; then
- "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -T -d "$prefix" < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "lara" ]; then
- "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -B -d "$prefix" < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "scarna" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -s -d "$prefix" < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>&1 || exit 1
- elif [ $distance = "global" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -F < infile > /dev/null || exit 1
- cat hat3.seed hat3 > hatx
- mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "local" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -L < infile > /dev/null || exit 1
+ echo "Done." 1>>"$progressfile"
+
+ seedoffset=`grep -c '^[>|=]' instr | head -1 `
+
+ echo "# of structures = " 1>>"$progressfile"
+ echo $seedoffset 1>>"$progressfile"
+ mv hat3 hat3.seed
+
+ if [ $anysymbol -eq 1 ]; then
+ cat instr >> orig
+ "$prefix/replaceu" $seqtype -i instr -o 0 > clean 2>>"$progressfile" || exit 1
+ mv clean infile
+
+ "$prefix/replaceu" $seqtype -i orig2 -o $seedoffset >> infile 2>>"$progressfile" || exit 1 # yarinaoshi
+ cat orig2 >> orig
+ else
+ cat instr > infile
+ cat infile2 >> infile
+ fi
+ else
+ cat /dev/null > hat3.seed
+ fi
+# cat hat3.seed
+
+
+
+
+ if [ $mccaskill -eq 1 ]; then
+ "$prefix/mccaskillwrap" -s -C $numthreads -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1
+ elif [ $dafs -eq 1 ]; then
+ "$prefix/mccaskillwrap" -G -C $numthreads -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1
+ elif [ $contrafold -eq 1 ]; then
+ "$prefix/contrafoldwrap" -d "$prefix" -i infile > hat4 2>>"$progressfile" || exit 1
+ fi
+ if [ $distance = "fasta" ]; then
+ "$prefix/dndfast7" $swopt < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "blast" ]; then
+ "$prefix/dndblast" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "foldalignlocal" ]; then
+ "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $lexp -f $lgop -Q $spfactor -h $laof -H -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "foldalignglobal" ]; then
+ "$prefix/pairlocalalign" -C $numthreads $seqtype $foldalignopt $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -H -o -global -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "slara" ]; then
+ "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -Q $spfactor -T -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "lara" ]; then
+ "$prefix/pairlocalalign" -C $numthreads -p $laraparams $seqtype $model -f $lgop -Q $spfactor -B -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "scarna" ]; then
+# "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -Q $spfactor -s -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+# cat hat3.seed hat3 > hatx
+# mv hatx hat3
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/tbfast" _ -C $numthreads $seqtype $model -f $pggop -Q $spfactor -s -d "$prefix" _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "dafs" ]; then
+ "$prefix/pairlocalalign" -C $numthreads $seqtype $model -f $pggop -Q $spfactor -G -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "global" ]; then
+# "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -A $usenaivepairscore $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# cat hat3.seed hat3 > hatx
+# mv hatx hat3
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -A $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+
+ elif [ $distance = "local" ]; then
+ if [ $fragment -ne 0 ]; then
+ "$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
cat hat3.seed hat3 > hatx
mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "globalgenaf" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -O $GGOP -E $GEXP -K < infile > /dev/null || exit 1
+ "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+# "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# addarg wo watasanai
+# cat hat3.seed hat3 > hatx
+# mv hatx hat3
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -L $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ fi
+ elif [ $distance = "globalgenaf" ]; then
+ "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -O $GGOP -E $GEXP -K $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "localgenaf" ]; then
+# "$prefix/pairlocalalign" -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -O $LGOP -E $LEXP -N $usenaivepairscore $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# cat hat3.seed hat3 > hatx
+# mv hatx hat3
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ "$prefix/tbfast" _ -u $unalignlevel $localparam -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -O $LGOP -E $LEXP -N $usenaivepairscore $focusarg _ -+ $iterate -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg $focusarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "last" ]; then
+ if [ $fragment -ne 0 ]; then
+ "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
cat hat3.seed hat3 > hatx
mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "localgenaf" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof -O $LGOP -E $LEXP -N < infile > /dev/null || exit 1
+ "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ "$prefix/pairlocalalign" -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -R $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+# addarg wo watasanai
cat hat3.seed hat3 > hatx
mv hatx hat3
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "distonly" ]; then
- "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -h $pgaof -t < infile > /dev/null || exit 1
- "$prefix/tbfast" $outnum $addarg -C $numthreads $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null || exit 1
- elif [ $distance = "parttree" ]; then
- "$prefix/splittbfast" -Q $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft -p $partsize -s $groupsize $treealg -i infile > pre || exit 1
- mv hat3.seed hat3
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ fi
+ elif [ $distance = "lastmulti" ]; then
+ "$prefix/dndpre" $model -M 2 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1
+ mv hat2 hat2i
+ "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -e $last_e -w $last_m -g $lexp -f $lgop -Q $spfactor -h $laof -r $last_subopt $last_once -d "$prefix" < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hat2 hat2n
+ mv hatx hat3
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ elif [ $distance = "multi" ]; then
+ "$prefix/dndpre" $model -M 2 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
+ mv hat2 hat2i
+ "$prefix/pairlocalalign" $localparam $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -Y $usenaivepairscore < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hat2 hat2n
+ mv hatx hat3
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ elif [ $distance = "hybrid" ]; then
+ "$prefix/pairlocalalign" $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -Q $spfactor -h $laof -Y < infile > /dev/null 2>>"$progressfile" || exit 1
+ cat hat3.seed hat3 > hatx
+ mv hatx hat3
+ "$prefix/disttbfast" -E 1 -s $unalignlevel $legacygapopt -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
else
- "$prefix/disttbfast" -O $outnum $addarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $algopt $treealg $scoreoutarg < infile > pre || exit 1
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ fi
+# elif [ $distance = "distonly" ]; then
+# "$prefix/pairlocalalign" -C $numthreads $seqtype $model -g $pgexp -f $pggop -Q $spfactor -h $pgaof -t < infile > /dev/null 2>>"$progressfile" || exit 1
+# "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $rnaopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ elif [ $distance = "parttree" ]; then
+ "$prefix/splittbfast" $legacygapopt $algopt $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof -p $partsize -s $groupsize $treealg $outnum -i infile > pre 2>>"$progressfile" || exit 1
+ mv hat3.seed hat3
+ elif [ $distance = "ktuplesmulti" ]; then
+# "$prefix/dndpre" $model -M 1 $addarg -C $numthreads $seqtype $model -g $lexp -f $lgop -h $laof < infile > /dev/null 2>>"$progressfile" || exit 1
+# mv hat2 hat2i
+# "$prefix/disttbfast" -E 1 -s $unalignlevel $legacygapopt -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $memopt $weightopt $treeinopt $treeoutopt -T -y $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# mv hat2 hat2n
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -d -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+# "$prefix/addsingle" -Q 100 $legacygapopt -d -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ echo "Impossible" 1>&2
+ exit 1
+ fi
+ else
+ if [ $fragment -ne 0 ]; then
+ "$prefix/addsingle" -Q 100 $legacygapopt -W $tuplesize -O $outnum $addsinglearg $addarg $add2ndhalfarg -C $numthreads $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -h $aof $param_fft $localparam $algopt $treealg $scoreoutarg < infile > /dev/null 2>>"$progressfile" || exit 1
+ else
+ "$prefix/disttbfast" -q $npickup -E $cycledisttbfast -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg -W $tuplesize $termgapopt $outnum $addarg $add2ndhalfarg -C $numthreadstb $memopt $weightopt $treeinopt $treeoutopt $distoutopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $algopt $treealg $scoreoutarg < infile > pre 2>>"$progressfile" || exit 1
mv hat3.seed hat3
fi
-
- while [ $cycle -gt 1 ]
- do
- if [ $distance = "parttree" ]; then
- mv pre infile
- "$prefix/splittbfast" -Z -Q $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft -p $partsize -s $groupsize $treealg -i infile > pre || exit 1
- else
- "$prefix/tbfast" -O $outnum -C $numthreads $rnaopt $weightopt $treeoutopt $distoutopt $memopt $seqtype $model -f "-"$gop -h "-"$aof $param_fft $localparam $algopt -J $treealg $scoreoutarg < pre > /dev/null || exit 1
- fi
- cycle=`expr $cycle - 1`
- done
- if [ $iterate -gt 0 ]; then
- if [ $distance = "sixtuples" ]; then
- "$prefix/dndpre" -C $numthreads < pre > /dev/null 2>&1 || exit 1
- fi
- "$prefix/dvtditr" -C $numthreads -t $randomseed $rnaoptit $memopt $scorecalcopt $localparam -z 50 $seqtype $model -f "-"$gop -h "-"$aof -I $iterate $weightopt $treeinopt $algoptit $treealg -p $parallelizationstrategy $scoreoutarg < pre > /dev/null || exit 1
+ fi
+ while [ $cycletbfast -gt 1 ]
+ do
+ if [ $distance = "parttree" ]; then
+ mv pre infile
+ "$prefix/splittbfast" $legacygapopt -Z $algopt $splitopt $partorderopt $parttreeoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof -p $partsize -s $groupsize $treealg $outnum -i infile > pre 2>>"$progressfile" || exit 1
+ else
+ "$prefix/tbfast" -W $minimumweight -V "-"$gopdist -s $unalignlevel $legacygapopt $mergearg $termgapopt $outnum -C $numthreadstb $rnaopt $weightopt $treeoutopt $distoutopt $memopt $seqtype $model -f "-"$gop -Q $spfactor -h $aof $param_fft $localparam $algopt -J $treealg $scoreoutarg < pre > /dev/null 2>>"$progressfile" || exit 1
+# fragment>0 no baai, nanimoshinai
+# seed youchuui!!
+ fi
+ cycletbfast=`expr $cycletbfast - 1`
+ done
+ if [ $iterate -gt 0 ]; then
+ if [ $distance = "ktuples" ]; then
+ "$prefix/dndpre" $model -M 2 -C $numthreads < pre > /dev/null 2>>"$progressfile" || exit 1
fi
+ "$prefix/dvtditr" -W $minimumweight $bunkatsuopt -E $fixthreshold -s $unalignlevel $legacygapopt $mergearg $outnum -C $numthreadsit -t $randomseed $rnaoptit $memopt $scorecalcopt $localparam -z 50 $seqtype $model -f "-"$gop -Q $spfactor -h $aof -I $iterate $weightopt $treeinopt $algoptit $treealg -p $parallelizationstrategy $scoreoutarg < pre > /dev/null 2>>"$progressfile" || exit 1
fi
-
if [ $coreout -eq 1 ]; then
"$prefix/setcore" -w $corewin -i $corethr $coreext < pre > pre2
mv pre2 pre
elif [ $anysymbol -eq 1 ]; then
- "$prefix/restoreu" -a pre -i orig > restored || exit 1
+ "$prefix/restoreu" $add2ndhalfarg -a pre -i orig > restored || exit 1
mv restored pre
fi
- if [ $quiet -eq 0 ]; then
- echo '' 1>&2
- if [ $mccaskill -eq 1 ]; then
- echo "RNA base pairing probaility was calculated by the McCaskill algorithm (1)" 1>&2
- echo "implemented in Vienna RNA package (2) and MXSCARNA (3), and then" 1>&2
- echo "incorporated in the iterative alignment process (4)." 1>&2
- echo "(1) McCaskill, 1990, Biopolymers 29:1105-1119" 1>&2
- echo "(2) Hofacker et al., 2002, J. Mol. Biol. 319:3724-3732" 1>&2
- echo "(3) Tabei et al., 2008, BMC Bioinformatics 9:33" 1>&2
- echo "(4) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>&2
- echo "" 1>&2
- elif [ $contrafold -eq 1 ]; then
- echo "RNA base pairing probaility was calculated by the CONTRAfold algorithm (1)" 1>&2
- echo "and then incorporated in the iterative alignment process (4)." 1>&2
- echo "(1) Do et al., 2006, Bioinformatics 22:e90-98" 1>&2
- echo "(2) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>&2
- echo "" 1>&2
- fi
- if [ $distance = "fasta" -o $partdist = "fasta" ]; then
- echo "Pairwise alignments were computed by FASTA" 1>&2
- echo "(Pearson & Lipman, 1988, PNAS 85:2444-2448)" 1>&2
- fi
- if [ $distance = "blast" ]; then
- echo "Pairwise alignments were computed by BLAST" 1>&2
- echo "(Altschul et al., 1997, NAR 25:3389-3402)" 1>&2
- fi
- if [ $distance = "scarna" ]; then
- echo "Pairwise alignments were computed by MXSCARNA" 1>&2
- echo "(Tabei et al., 2008, BMC Bioinformatics 9:33)." 1>&2
- fi
- if [ $distance = "lara" -o $distance = "slara" ]; then
- echo "Pairwise alignments were computed by LaRA" 1>&2
- echo "(Bauer et al., 2007, BMC Bioinformatics 8:271)." 1>&2
- fi
- if [ $distance = "foldalignlocal" ]; then
- echo "Pairwise alignments were computed by FOLDALIGN (local)" 1>&2
- echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>&2
- fi
- if [ $distance = "foldalignglobal" ]; then
- echo "Pairwise alignments were computed by FOLDALIGN (global)" 1>&2
- echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>&2
+
+
+
+ echo '' 1>>"$progressfile"
+ if [ $mccaskill -eq 1 ]; then
+ echo "RNA base pairing probaility was calculated by the McCaskill algorithm (1)" 1>>"$progressfile"
+ echo "implemented in Vienna RNA package (2) and MXSCARNA (3), and then" 1>>"$progressfile"
+ echo "incorporated in the iterative alignment process (4)." 1>>"$progressfile"
+ echo "(1) McCaskill, 1990, Biopolymers 29:1105-1119" 1>>"$progressfile"
+ echo "(2) Hofacker et al., 2002, J. Mol. Biol. 319:3724-3732" 1>>"$progressfile"
+ echo "(3) Tabei et al., 2008, BMC Bioinformatics 9:33" 1>>"$progressfile"
+ echo "(4) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ elif [ $contrafold -eq 1 ]; then
+ echo "RNA base pairing probaility was calculated by the CONTRAfold algorithm (1)" 1>>"$progressfile"
+ echo "and then incorporated in the iterative alignment process (4)." 1>>"$progressfile"
+ echo "(1) Do et al., 2006, Bioinformatics 22:e90-98" 1>>"$progressfile"
+ echo "(2) Katoh and Toh, 2008, BMC Bioinformatics 9:212" 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ fi
+ if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then
+ echo "Input structures are decomposed into structural domains using" 1>>"$progressfile"
+ echo "Protein Domain Parser (Alexandrov & Shindyalov 2003)." 1>>"$progressfile"
+ echo "Domain pairs are aligned using the rash function in" 1>>"$progressfile"
+ echo "the ASH structural alignment package (Standley et al. 2007)." 1>>"$progressfile"
+ fi
+ if [ $pdblist != "/dev/null" ]; then
+ echo "Pre-computed alignments stored in " 1>>"$progressfile"
+ echo "DASH (http://sysimm.ifrec.osaka-u.ac.jp/dash/) are used. " 1>>"$progressfile"
+ fi
+ if [ $distance = "fasta" -o $partdist = "fasta" ]; then
+ echo "Pairwise alignments were computed by FASTA" 1>>"$progressfile"
+ echo "(Pearson & Lipman, 1988, PNAS 85:2444-2448)" 1>>"$progressfile"
+ fi
+ if [ $distance = "blast" ]; then
+ echo "Pairwise alignments were computed by BLAST" 1>>"$progressfile"
+ echo "(Altschul et al., 1997, NAR 25:3389-3402)" 1>>"$progressfile"
+ fi
+ if [ $distance = "last" -o $distance = "lastmulti" ]; then
+ echo "Pairwise alignments were computed by LAST" 1>>"$progressfile"
+ echo "http://last.cbrc.jp/" 1>>"$progressfile"
+ echo "Kielbasa, Wan, Sato, Horton, Frith 2011 Genome Res. 21:487" 1>>"$progressfile"
+ fi
+ if [ $distance = "scarna" ]; then
+ echo "Pairwise alignments were computed by MXSCARNA" 1>>"$progressfile"
+ echo "(Tabei et al., 2008, BMC Bioinformatics 9:33)." 1>>"$progressfile"
+ fi
+ if [ $distance = "dafs" ]; then
+ echo "Pairwise alignments were computed by DAFS" 1>>"$progressfile"
+ echo "(Sato et al., 2012,,,,)." 1>>"$progressfile"
+ fi
+ if [ $distance = "lara" -o $distance = "slara" ]; then
+ echo "Pairwise alignments were computed by LaRA" 1>>"$progressfile"
+ echo "(Bauer et al., 2007, BMC Bioinformatics 8:271)." 1>>"$progressfile"
+ fi
+ if [ $distance = "foldalignlocal" ]; then
+ echo "Pairwise alignments were computed by FOLDALIGN (local)" 1>>"$progressfile"
+ echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>>"$progressfile"
+ fi
+ if [ $distance = "foldalignglobal" ]; then
+ echo "Pairwise alignments were computed by FOLDALIGN (global)" 1>>"$progressfile"
+ echo "(Havgaard et al., 2007, PLoS Computational Biology 3:e193)." 1>>"$progressfile"
+ fi
+# printf "\n" 1>>"$progressfile"
+ echo 'Strategy:' 1>>"$progressfile"
+ printf ' '$strategy 1>>"$progressfile"
+ echo ' ('$performance')' 1>>"$progressfile"
+ echo ' '$explanation 1>>"$progressfile"
+ echo '' 1>>"$progressfile"
+ echo "If unsure which option to use, try 'mafft --auto input > output'." 1>>"$progressfile"
+ echo "For more information, see 'mafft --help', 'mafft --man' and the mafft page." 1>>"$progressfile"
+ echo "" 1>>"$progressfile"
+ echo "The default gap scoring scheme has been changed in version 7.110 (2013 Oct)." 1>>"$progressfile"
+ echo "It tends to insert more gaps into gap-rich regions than previous versions." 1>>"$progressfile"
+ echo "To disable this change, add the --leavegappyregion option." 1>>"$progressfile"
+# echo "If long gaps are expected, try 'mafft --ep 0.0 --auto input > output'." 1>>"$progressfile"
+# echo "If the possibility of long gaps can be excluded, add '--ep 0.123'." 1>>"$progressfile"
+ if [ $distance = "localgenaf" -o $distance = "globalgenaf" ]; then
+ echo "" 1>>"$progressfile"
+ if [ $oldgenafparam -eq 1 ]; then
+ echo "Obsolete parameters used for this calculation." 1>>"$progressfile"
+ echo "Also try the new parameters for E-INS-i, by not specifying --oldgenafpair." 1>>"$progressfile"
+ else
+ echo "Parameters for the E-INS-i option have been changed in version 7.243 (2015 Jun)." 1>>"$progressfile"
+ echo "To switch to the old parameters, use --oldgenafpair, instead of --genafpair." 1>>"$progressfile"
fi
- printf "\n" 1>&2
- echo 'Strategy:' 1>&2
- printf ' '$strategy 1>&2
- echo ' ('$performance')' 1>&2
- echo ' '$explanation 1>&2
- echo '' 1>&2
- echo "If unsure which option to use, try 'mafft --auto input > output'." 1>&2
-# echo "If long gaps are expected, try 'mafft --ep 0.0 --auto input > output'." 1>&2
- echo "If the possibility of long gaps can be excluded, add '--ep 0.123'." 1>&2
- echo "For more information, see 'mafft --help', 'mafft --man' and the mafft page." 1>&2
- echo '' 1>&2
fi
- )
+ echo '' 1>>"$progressfile"
+
+
+ if [ $pdblist != "/dev/null" -o $ownlist != "/dev/null" ]; then
+ cat dasherr >>"$progressfile"
+ fi
+
+# ) # 2017/Mar/17
+ popd > /dev/null;
+
if [ "$outputfile" = "" ]; then
if [ "$outputopt" = "null" ]; then
cat < $TMPFILE/pre || exit 1
else
- "$prefix/f2cl" -n $namelength $outputopt < $TMPFILE/pre || exit 1
+ "$prefix/f2cl" -n $namelength $outputopt < $TMPFILE/pre 2>>/dev/null || exit 1
fi
else
if [ "$outputopt" = "null" ]; then
cat < $TMPFILE/pre > "$outputfile" || exit 1
else
- "$prefix/f2cl" -n $namelength $outputopt < $TMPFILE/pre > "$outputfile" || exit 1
+ "$prefix/f2cl" -n $namelength $outputopt < $TMPFILE/pre > "$outputfile" 2>>/dev/null || exit 1
fi
fi
cp $TMPFILE/infile.tree "$infilename.tree"
fi
+ if [ -s $TMPFILE/GuideTree ]; then # --merge no toki dake
+ cp $TMPFILE/GuideTree .
+ fi
+
if [ $distout -eq 1 ]; then
cp $TMPFILE/hat2 "$infilename.hat2"
fi
+ if [ $npickup -ne 0 ]; then
+ cp $TMPFILE/notused "$infilename.notused"
+ fi
+
+ if [ -s $TMPFILE/_deletemap ]; then
+ if [ "$mapoutfile" = "/dev/null" ]; then
+ cp $TMPFILE/_deletemap "$addfile.map"
+ else
+ cp $TMPFILE/_deletemap "$mapoutfile"
+ fi
+ fi
+
exit 0;
fi
printf( "\n" ) > "/dev/tty";
printf( " MAFFT %s\n", version ) > "/dev/tty";
printf( "\n" ) > "/dev/tty";
- printf( " Copyright (c) 2011 Kazutaka Katoh\n" ) > "/dev/tty";
- printf( " NAR 30:3059-3066, NAR 33:511-518\n" ) > "/dev/tty";
- printf( " http://mafft.cbrc.jp/alignment/software/\n" ) > "/dev/tty";
+ printf( " Copyright (c) 2016 Kazutaka Katoh\n" ) > "/dev/tty";
+ printf( " MBE 30:772-780 (2013), NAR 30:3059-3066 (2002)\n" ) > "/dev/tty";
+ printf( " http://mafft.cbrc.jp/alignment/software/\n" ) > "/dev/tty";
printf( "---------------------------------------------------------------------\n" ) > "/dev/tty";
printf( "\n" ) > "/dev/tty";
while( 1 )
{
printf( "\n" ) > "/dev/tty";
- printf( "Additional arguments? (--ep #, --op #, --kappa #, etc)\n" ) > "/dev/tty";
+ printf( "Additional arguments? (--ep # --op # --kappa # etc)\n" ) > "/dev/tty";
printf( "@ " ) > "/dev/tty";
res = getline < "/dev/tty";
close( "/dev/tty" );
printf( "\n" ) > "/dev/tty";
}
system( command );
- command = sprintf( "less \"%s\"", outfile );
+ command = sprintf( "more \"%s\"", outfile );
system( command );
printf( "Press Enter to exit." ) > "/dev/tty";
res = getline < "/dev/tty";
# -w entire sequences are subjected to BLAST search
# (default: well-aligned region only)
-require 'getopts'
+#require 'getopts'
+require 'optparse'
require 'tempfile'
# mktemp
break if $_ =~ /MAFFT v/
end
pfp.close
+
if( $_ ) then
- mafftversion = sub( /^\D*/, "" ).split(" ").slice(0).strip.to_s
+ mafftversion = $_.sub( /^\D*/, "" ).split(" ").slice(0).strip.to_s
else
mafftversion = "0"
end
if( mafftversion < "5.58" ) then
- puts ""
- puts "======================================================"
- puts "Install new mafft (v. >= 5.58)"
- puts "======================================================"
- puts ""
+ STDERR.puts ""
+ STDERR.puts "======================================================"
+ STDERR.puts "Install new mafft (v. >= 5.58)"
+ STDERR.puts "======================================================"
+ STDERR.puts ""
exit
end
corewin = 50
corethr = 0.3
mafftopt = " --op 1.53 --ep 0.123 --localpair --maxiterate 1000 --reorder "
-if getopts( "s", "f", "w", "l", "h", "e:", "a:", "o:", "c:", "d:" ) == nil || ARGV.length == 0 || $OPT_h then
- puts "Usage: #{$0} [-h -l -e# -a# -o\"[options for mafft]\"] input_file"
- exit
-end
-if $OPT_c then
- corewin = $OPT_c.to_i
+
+#if getopts( "s", "f", "w", "l", "h", "e:", "a:", "o:", "c:", "d:" ) == nil || ARGV.length == 0 || $OPT_h then
+# puts "Usage: #{$0} [-h -l -e# -a# -o\"[options for mafft]\"] input_file"
+# exit
+#end
+params = ARGV.getopts( "sfwlhe:a:o:c:d:" )
+
+
+#if $OPT_c then
+if params["c"] != nil then
+ corewin = params["c"].to_i
end
-if $OPT_d then
- corethr = $OPT_d.to_f
+
+#if $OPT_d then
+if params["d"] != nil then
+ corethr = params["d"].to_f
end
-if $OPT_w
+
+#if $OPT_w
+if params["w"] == true then
entiresearch = 1
end
-if $OPT_f
+
+#if $OPT_f
+if params["f"] == true then
fullout = 1
end
-if $OPT_s
+
+#if $OPT_s
+if params["s"] == true then
fullout = 0
end
-if $OPT_l
+
+#if $OPT_l
+if params["l"] == true then
local = 1
end
-if $OPT_e then
- eval = $OPT_e.to_f
+
+#if $OPT_e then
+if params["e"] != nil then
+# eval = $OPT_e.to_f
+ eval = params["e"].to_f
end
-if $OPT_a then
- nadd = $OPT_a.to_i
+
+#if $OPT_a then
+if params["a"] != nil then
+ nadd = params["a"].to_i
end
-if $OPT_o then
- mafftopt += " " + $OPT_o + " "
+
+#if $OPT_o then
+if params["o"] != nil then
+ mafftopt += " " + params["o"] + " "
end
-system "cat " + ARGV.to_s + " > #{temp_if}"
+infn = ARGV[0].to_s.strip
+
+system "cat " + infn + " > #{temp_if}"
ar = mafftopt.split(" ")
nar = ar.length
for i in 0..(nar-1)
end
if local == 0 then
- command = "lynx -source 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?QUERY=" + inseq[i] + "&DATABASE=swissprot&HITLIST_SIZE=" + nadd.to_s + "&FILTER=L&EXPECT='" + eval.to_s + "'&FORMAT_TYPE=TEXT&PROGRAM=blastp&SERVICE=plain&NCBI_GI=on&PAGE=Proteins&CMD=Put' > #{temp_rid}"
+ command = "lynx -source 'https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?QUERY=" + inseq[i] + "&DATABASE=swissprot&HITLIST_SIZE=" + nadd.to_s + "&FILTER=L&EXPECT='" + eval.to_s + "'&FORMAT_TYPE=TEXT&PROGRAM=blastp&SERVICE=plain&NCBI_GI=on&PAGE=Proteins&CMD=Put' > #{temp_rid}"
system command
ridp = File.open( "#{temp_rid}", 'r' )
while 1
STDERR.printf "."
sleep 10
- command = "lynx -source 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RID=" + rid + "&DESCRIPTIONS=500&ALIGNMENTS=" + nadd.to_s + "&ALIGNMENT_TYPE=Pairwise&OVERVIEW=no&CMD=Get&FORMAT_TYPE=XML' > #{temp_res}"
+ command = "lynx -source 'https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RID=" + rid + "&DESCRIPTIONS=500&ALIGNMENTS=" + nadd.to_s + "&ALIGNMENT_TYPE=Pairwise&OVERVIEW=no&CMD=Get&FORMAT_TYPE=XML' > #{temp_res}"
system command
resp = File.open( "#{temp_res}", 'r' )
# resp.gets
system( "rm -rf #{temp_if} #{temp_vf} #{temp_af} #{temp_bf} #{temp_pf} #{temp_qf} #{temp_res} #{temp_rid}" )
+if File.exist?( "#{temp_af}.tree" ) then
+ system( "sed 's/_addedbymaffte_/_ho_/' #{temp_af}.tree > #{ARGV[0].to_s}.tree" )
+ system( "rm #{temp_af}.tree" )
+end
--- /dev/null
+#! /usr/bin/env ruby
+require 'optparse'
+
+mafftpath = "/usr/local/bin/mafft"
+
+def cleartempfiles( filenames )
+ for f in filenames
+ system( "rm -rf #{f}" )
+ end
+end
+
+
+seed = 0
+scand = "50%"
+npick = 500
+infn = ""
+reorderoption = "--reorder"
+pickoptions = " --retree 1 "
+coreoptions = " --globalpair --maxiterate 100 "
+corelastarg = " "
+addoptions = " "
+directionoptions = " --retree 0 --pileup "
+markcore = ""
+randompickup = true
+outnum = false
+
+begin
+ params = ARGV.getopts('m:s:n:p:i:C:L:A:o:MhuD:')
+rescue => e
+ STDERR.puts e
+ STDERR.puts "See #{$0} -h"
+ exit 1
+end
+
+#p params
+
+mafftpath = params["m"] if params["m"]
+seed = params["s"].to_i if params["s"]
+scand = params["n"].to_s if params["n"]
+npick = params["p"].to_i if params["p"]
+infn = params["i"] if params["i"]
+#pickoptions += params["P"] if params["P"]
+coreoptions += params["C"] if params["C"] # tsuikagaki!
+corelastarg += params["L"] if params["L"] # tsuikagaki!
+addoptions += params["A"] if params["A"]
+directionoptions += params["D"] if params["D"] # tsuikagaki
+markcore = "*" if params["M"]
+#randompickup = false if params["S"]
+reorderoption = "" if params["o"] =~ /^i/
+outnum = true if params["u"]
+
+if params["h"] then
+ STDERR.puts "Usage: #{$0} -i inputfile [options]"
+ STDERR.puts "Options:"
+ STDERR.puts " -i string Input file."
+ STDERR.puts " -m string Mafft command. Default: mafft"
+ STDERR.puts " -s int Seed. Default:0"
+ STDERR.puts " -n int Number of candidates for core sequences. Default: upper 50% in length"
+ STDERR.puts " -p int Number of core sequences. Default: 500"
+# STDERR.puts " -P \"string\" Mafft options for the PICKUP stage."
+# STDERR.puts " Default: \"--retree 1\""
+# STDERR.puts " -S Tree-based pickup. Default: off"
+ STDERR.puts " -C \"string\" Mafft options for the CORE stage."
+ STDERR.puts " Default: \"--globalpair --maxiterate 100\""
+ STDERR.puts " -A \"string\" Mafft options for the ADD stage."
+ STDERR.puts " Default: \"\""
+ STDERR.puts " -D \"string\" Mafft options for inferring the direction of nucleotide sequences."
+ STDERR.puts " Default: \"\""
+ STDERR.puts " -o r or i r: Reorder the sequences based on similarity. Default"
+ STDERR.puts " i: Same as input."
+ exit 1
+end
+
+if infn == "" then
+ STDERR.puts "Give input file with -i."
+ exit 1
+end
+
+
+
+pid = $$.to_s
+tmpdir = ENV["TMPDIR"]
+tmpdir = "/tmp" if tmpdir == nil
+tempfiles = []
+tempfiles.push( temp_pf = tmpdir + "/_pf" + pid )
+tempfiles.push( temp_nf = tmpdir + "/_nf" + pid )
+tempfiles.push( temp_cf = tmpdir + "/_cf" + pid )
+tempfiles.push( temp_of = tmpdir + "/_of" + pid )
+
+Signal.trap(:INT){cleartempfiles( tempfiles ); exit 1}
+at_exit{ cleartempfiles( tempfiles )}
+
+system "#{mafftpath} --version > #{temp_of} 2>&1"
+
+fp = File.open( temp_of, "r" )
+ line = fp.gets
+fp.close
+
+
+versionnum = line.split(' ')[0].sub(/v/,"").to_f
+
+if versionnum < 7.210 then
+ STDERR.puts "\n"
+ STDERR.puts "Please use mafft version >= 7.210\n"
+ STDERR.puts "\n"
+ exit
+end
+
+srand( seed )
+
+def readfasta( fp, name, seq )
+ nseq = 0
+ tmpseq = ""
+ while fp.gets
+ if $_ =~ /^>/ then
+ name.push( $_.sub(/>/,"").chop )
+ seq.push( tmpseq ) if nseq > 0
+ nseq += 1
+ tmpseq = ""
+ else
+ tmpseq += $_.strip
+ end
+ end
+ seq.push( tmpseq )
+ return nseq
+end
+
+
+
+begin
+ infp = File.open( infn, "r" )
+rescue => e
+ STDERR.puts e
+ exit 1
+end
+infp.close
+
+if directionoptions =~ /--adjustdirection/ then
+ system( mafftpath + "#{directionoptions} #{infn} > #{temp_of}" )
+else
+ system( "cp #{infn} #{temp_of}" )
+end
+
+tname = []
+tseq = []
+infp = File.open( temp_of, "r" )
+tin = readfasta( infp, tname, tseq )
+infp.close
+lenhash = {}
+
+if outnum then
+ for i in 0..(tin-1)
+ tname[i] = "_numo_s_#{i}_numo_e_" + tname[i]
+ end
+end
+
+npick = 0 if npick == 1
+npick = tin if npick > tin
+
+
+if scand =~ /%$/ then
+ ncand = (tin * scand.to_f * 0.01 ).to_i
+else
+ ncand = scand.to_i
+end
+
+if ncand < 0 || ncand > tin then
+ STDERR.puts "Error. -n #{scand}?"
+ exit 1
+end
+
+ncand = npick if ncand < npick
+ncand = tin if ncand > tin
+
+STDERR.puts "ncand = #{ncand}, npick = #{npick}"
+
+
+sai = []
+for i in 0..(tin-1)
+ lenhash[i] = tseq[i].gsub(/-/,"").length
+end
+
+i = 0
+sorted = lenhash.sort_by{|key, value| [-value, i+=1]}
+#for i in 0..(ncand-1)
+# sai[sorted[i][0]] = 1
+#end
+#for i in ncand..(tin-1)
+# sai[sorted[i][0]] = 0
+#end
+
+ncandres = 0
+ntsukau = 0
+for i in 0..(tin-1)
+ cand = sorted[i][0]
+ if tname[cand] =~ /^_focus_/ then
+ sai[cand] = 0
+ ntsukau += 1
+ elsif ncandres < ncand then
+ unless tname[cand] =~ /^_tsukawanai_/ then
+ sai[cand] = 1
+ ncandres += 1
+ else
+ sai[cand] = 0
+ end
+ else
+ sai[cand] = 0
+ end
+end
+
+if ncandres+ntsukau < npick
+ STDERR.puts "ncandres = #{ncandres}"
+ STDERR.puts "ncand = #{ncand}"
+ STDERR.puts "ntsukau = #{ntsukau}"
+ STDERR.puts "npick = #{npick}"
+ STDERR.puts "Too many _tsukawanai_ sequences."
+ exit 1
+end
+
+if ntsukau > npick
+ STDERR.puts "ntsukau = #{ntsukau}"
+ STDERR.puts "npick = #{npick}"
+ STDERR.puts "Too many _focus_ sequences."
+ exit 1
+end
+
+#p sai
+#for i in 0..(tin-1)
+# puts sai[i].to_s + " " + tname[i]
+#end
+
+npickrand = npick - ntsukau
+
+if randompickup
+ pick = []
+ for i in 0..(npickrand-1)
+ pick[i] = 1
+ end
+ for i in npickrand..(ncandres-1)
+ pick[i] = 0
+ end
+ pick2 = pick.sort_by{rand}
+ pick = pick2
+# p pick
+# p sai
+
+ ipick = 0
+ for i in 0..(tin-1)
+ if sai[i] == 1 then
+ if pick[ipick] == 0 then
+ sai[i] = 0
+ end
+ ipick += 1
+ end
+ end
+# p sai
+
+ for i in 0..(tin-1)
+ if tname[i] =~ /^_focus_/ then
+ sai[i] = 1
+ end
+ end
+# p sai
+
+ pfp = File.open( temp_pf, 'w' )
+ nfp = File.open( temp_nf, 'w' )
+
+ i = 0
+ while i < tin
+ if sai[i] == 1 then
+ pfp.puts ">" + i.to_s + " " + ">" + markcore + tname[i]
+ pfp.puts tseq[i]
+ else
+ nfp.puts ">" + i.to_s + " " + ">" + tname[i]
+ nfp.puts tseq[i]
+ end
+ i += 1
+ end
+
+ nfp.close
+ pfp.close
+
+else # yamerukamo
+ STDERR.puts "Not supported in this version"
+ exit 1
+end
+
+if npick > 1 then
+ if npick < tin then
+ system( mafftpath + " #{coreoptions} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # add de sort
+ else
+ system( mafftpath + " #{coreoptions} #{reorderoption} #{temp_pf} #{corelastarg} > #{temp_cf}" ) # ima sort
+ end
+ res = ( File::stat(temp_cf).size == 0 )
+else
+ system( "cat /dev/null > #{temp_cf}" )
+ res = false
+end
+
+if res == true then
+ STDERR.puts "\n\nError in the core alignment stage.\n\n"
+ exit 1
+end
+
+
+if npick < tin
+ system( mafftpath + " #{addoptions} #{reorderoption} --add #{temp_nf} #{temp_cf} > #{temp_of}" )
+ res = ( File::stat(temp_of).size == 0 )
+else
+ system( "cp #{temp_cf} #{temp_of}" )
+ res = false
+end
+
+if res == true then
+ STDERR.puts "\n\nError in the add stage.\n\n"
+ exit 1
+end
+
+resname = []
+resseq = []
+resfp = File.open( temp_of, "r" )
+nres = readfasta( resfp, resname, resseq )
+resfp.close
+
+if reorderoption =~ /--reorder/ then
+ for i in 0..(nres-1)
+ puts ">" + resname[i].sub(/^[0-9]* >/,"")
+ puts resseq[i]
+ end
+else
+ seqhash = {}
+ namehash = {}
+ seqlast = []
+ namelast = []
+ nlast = 0
+ for i in 0..(nres-1)
+ if resname[i] =~ /^[0-9]* >/
+ key = resname[i].split(' ')[0]
+ seqhash[key] = resseq[i]
+ namehash[key] = resname[i]
+ else
+ seqlast.push( resseq[i] )
+ namelast.push( resname[i] )
+ nlast += 1
+ end
+ end
+ for i in 0..(nlast-1)
+ puts ">" + namelast[i]
+ puts seqlast[i]
+ end
+ for i in 0..(nres-nlast-1)
+ key = i.to_s
+ puts ">" + namehash[key].sub(/^[0-9]* >/,"")
+ puts seqhash[key]
+ end
+end
+
+
> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
-M-----------------------------------------------------------
------------------------------------NGTE--GDNFYVPF-----------
----------------------------------------------SNKTGLARSPYEYPQ
-Y-YLAEPW------------K--------------------------------YSALAAY
-MFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTS-MN-
-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FRFGNTHAIMGV
-AFTWIMALAC-AAPPLVG-WSRYIP-------EGM----QCSCGPDYYTLNPNFNNESYV
-VYMFVVHFLVPFVIIFFCYGRLLCTV----KE----------------------------
-------------------------------------------------------------
-----------------------------------------------AAAAQQE-------
-------------------------------SASTQKAEKEVTRMVVLMVIGFLVCWVPYA
-SVAFYIFT-HQG-S--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLC-C-
---GKNP-------------------LGDDE--SGASTSKT-E--VSSVS-TSPV------
-------------------------------------------------------------
--------------------SP-A---
+MN----------------------------------------------------------
+GTE--GDNFYVP------------FSNKTGLARSPYEYPQ---------Y-YLAEPWK--
+-------YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLF
+GF-TVTMYTS-MN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMG
+NF-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNP
+NFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE--------------------
+------------------------------------------------------------
+------------------------------------------------------AAAAQQ
+---------------------------------ESASTQKAEKEVTRMVVLMVIGFLVCW
+VPYASVAFYIFT-HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLC
+C-------GKNPLGD-DE---SGASTSKT------------------------EVSSVS-
+------------------------------------------------------------
+-TSPVSP-A---
> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]
-M-----------------------------------------------------------
------------------------------------NGTE--GPNFYVPF-----------
----------------------------------------------SNITGVVRSPFEQPQ
-Y-YLAEPW------------Q--------------------------------FSMLAAY
-MFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTS-LH-
-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FRFGENHAIMGV
-AFTWVMALAC-AAPPLVG-WSRYIP-------EGM----QCSCGIDYYTLKPEVNNESFV
-IYMFVVHFTIPMIVIFFCYGQLVFTV----KE----------------------------
-------------------------------------------------------------
-----------------------------------------------AAAQQQE-------
-------------------------------SATTQKAEKEVTRMVIIMVIFFLICWLPYA
-SVAMYIFT-HQG-S--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLC-C-
---GKNP-------------------LGDDE--ASATASKT-E--------TSQV------
-------------------------------------------------------------
--------------------AP-A---
+MN----------------------------------------------------------
+GTE--GPNFYVP------------FSNITGVVRSPFEQPQ---------Y-YLAEPWQ--
+-------FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFG
+GF-TTTLYTS-LH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMS
+NF-RFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKP
+EVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE--------------------
+------------------------------------------------------------
+------------------------------------------------------AAAQQQ
+---------------------------------ESATTQKAEKEVTRMVIIMVIFFLICW
+LPYASVAMYIFT-HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLC
+C-------GKNPLGD-DE---ASATASKT------------------------E------
+------------------------------------------------------------
+-TSQVAP-A---
> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9
-M-----------------------------------------------------------
------------------------------------NGTE--GINFYVPM-----------
----------------------------------------------SNKTGVVRSPFEYPQ
-Y-YLAEPW------------K--------------------------------YRLVCCY
-IFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTA-WN-
-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FRFSATHAMMGI
-AFTWVMAFSC-AAPPLFG-WSRYMP-------EGM----QCSCGPDYYTHNPDYHNESYV
-LYMFVIHFIIPVVVIFFSYGRLICKV----RE----------------------------
-------------------------------------------------------------
-----------------------------------------------AAAQQQE-------
-------------------------------SATTQKAEKEVTRMVILMVLGFMLAWTPYA
-VVAFWIFT-NKG-A--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTIC-C-
---GKNP-------------------FGDEDVSSTVSQSKT-E--VSSVS-SSQV------
-------------------------------------------------------------
--------------------SP-A---
+MN----------------------------------------------------------
+GTE--GINFYVP------------MSNKTGVVRSPFEYPQ---------Y-YLAEPWK--
+-------YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACF
+GF-TVTFYTA-WN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMG
+NF-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNP
+DYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE--------------------
+------------------------------------------------------------
+------------------------------------------------------AAAQQQ
+---------------------------------ESATTQKAEKEVTRMVILMVLGFMLAW
+TPYAVVAFWIFT-NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTIC
+C-------GKNPFGD-EDV-SSTVSQSKT------------------------EVSSVS-
+------------------------------------------------------------
+-SSQVSP-A---
> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
-M-----------------------------------------------------------
------------------------------------NGTE--GKNFYVPM-----------
----------------------------------------------SNRTGLVRSPFEYPQ
-Y-YLAEPW------------Q--------------------------------FKILALY
-LFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTA-IN-
-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSSHAFAGI
-AFTWVMALAC-AAPPLFG-WSRYIP-------EGM----QCSCGPDYYTLNPDYNNESYV
-IYMFVCHFILPVAVIFFTYGRLVCTV----KA----------------------------
-------------------------------------------------------------
-----------------------------------------------AAAQQQD-------
-------------------------------SASTQKAEREVTKMVILMVFGFLIAWTPYA
-TVAAWIFF-NKG-A--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIF-C-
---GKNP-------------------LGDDE-SSTVSTSKT-E--VSS------V------
-------------------------------------------------------------
--------------------SP-A---
+MN----------------------------------------------------------
+GTE--GKNFYVP------------MSNRTGLVRSPFEYPQ---------Y-YLAEPWQ--
+-------FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCF
+GF-TVTFYTA-IN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMG
+SF-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNP
+DYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA--------------------
+------------------------------------------------------------
+------------------------------------------------------AAAQQQ
+---------------------------------DSASTQKAEREVTKMVILMVFGFLIAW
+TPYATVAAWIFF-NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIF
+C-------GKNPLGD-DE--SSTVSTSKT------------------------EVSS---
+------------------------------------------------------------
+----VSP-A---
> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
-M-----------------------------------------------------------
------------------------------------NGTE--GNNFYVPL-----------
----------------------------------------------SNRTGLVRSPFEYPQ
-Y-YLAEPW------------Q--------------------------------FKLLAVY
-MFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTA-IN-
-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSTHASAGI
-AFTWVMAMAC-AAPPLVG-WSRYIP-------EGI----QCSCGPDYYTLNPEYNNESYV
-LYMFICHFILPVTIIFFTYGRLVCTV----KA----------------------------
-------------------------------------------------------------
-----------------------------------------------AAAQQQD-------
-------------------------------SASTQKAEREVTKMVILMVLGFLVAWTPYA
-TVAAWIFF-NKG-A--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLF-C-
---GKNP-------------------LGDEE-SSTVSTSKT-E--VSS------V------
-------------------------------------------------------------
--------------------SP-A---
+MN----------------------------------------------------------
+GTE--GNNFYVP------------LSNRTGLVRSPFEYPQ---------Y-YLAEPWQ--
+-------FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCF
+GF-TVTFYTA-IN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMG
+SF-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNP
+EYNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA--------------------
+------------------------------------------------------------
+------------------------------------------------------AAAQQQ
+---------------------------------DSASTQKAEREVTKMVILMVLGFLVAW
+TPYATVAAWIFF-NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLF
+C-------GKNPLGD-EE--SSTVSTSKT------------------------EVSS---
+------------------------------------------------------------
+----VSP-A---
> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208-
-M-----------------------------------------------------------
------------------------------------KQVPEFHEDFYIPIPL---------
--------------------------------------------DINNLS--AYSPFLVPQ
-D-HLGNQG------------I--------------------------------FMAMSVF
-MFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSF-FN-
-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FTFKTPHAIAGC
-ILPWISALAA-SLPPLFG-WSRYIP-------EGL----QCSCGPDWYTTNNKYNNESYV
-MFLFCFCFAVPFGTIVFCYGQLLITL----KL----------------------------
-------------------------------------------------------------
-----------------------------------------------AAKAQAD-------
-------------------------------SASTQKAEREVTKMVVVMVLGFLVCWAPYA
-SFSLWIVS-HRG-E--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMV-C-
---GKN--------------------IEEDE--ASTSSQVT-Q--VSS------V------
-------------------------------------------------------------
--------------------APEK---
+MK----------------------------------------------------------
+QVPEFHEDFYIPI--------PLDINNLS--AYSPFLVPQ---------D-HLGNQGI--
+-------FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIF
+GS-PLSFYSF-FN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLG
+NF-TFKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNN
+KYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KL--------------------
+------------------------------------------------------------
+------------------------------------------------------AAKAQA
+---------------------------------DSASTQKAEREVTKMVVVMVLGFLVCW
+APYASFSLWIVS-HRGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMV
+C-------GKN-IEE-DE---ASTSSQVT------------------------QVSS---
+------------------------------------------------------------
+----VAPEK---
> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
-M-----------------------------------------------------------
------------------------------------RKMS--EEEFYL---F---------
----------------------------------------------KNISSV--GPWDGPQ
-Y-HIAPVW------------A--------------------------------FYLQAAF
-MGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVAS-CN-
-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FRFSSKHALTVV
-LATWTIGIGV-SIPPFFG-WSRFIP-------EGL----QCSCGPDWYTVGTKYRSESYT
-WFLFIFCFIVPLSLICFSYTQLLRAL----KA----------------------------
-------------------------------------------------------------
-----------------------------------------------VAAQQQE-------
-------------------------------SATTQKAEREVSRMVVVMVGSFCVCYVPYA
-AFAMYMVN-NRN-H--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMV-C-
---GKA--------------------MTDES--DTCSSQKT-E--VSTVS-STQV------
-------------------------------------------------------------
--------------------GP-N---
+MR----------------------------------------------------------
+KMS--EEEFYL-------------FKNISSV--GPWDGPQ---------Y-HIAPVWA--
+-------FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIF
+SV-FPVFVAS-CN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFG
+NF-RFSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGT
+KYRSESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KA--------------------
+------------------------------------------------------------
+------------------------------------------------------VAAQQQ
+---------------------------------ESATTQKAEREVSRMVVVMVGSFCVCY
+VPYAAFAMYMVN-NRNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMV
+C-------GKA-MTD-ES---DTCSSQKT------------------------EVSTVS-
+------------------------------------------------------------
+-STQVGP-N---
> 8=opsin, greensensitive human (fragment) S07060
------------------------------------------------------------
------------------------------------------------------------
+-----------------------------------------------------DLAETVI
+AS-TISIVNQ-VS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFG
+NV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSS
+YPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA--------------------
------------------------------------------------------------
+------------------------------------------------------VAKQQK
+---------------------------------ESESTQKAEKEVTRMVVVMVLAFC---
------------------------------------------------------------
----------------------------------------DLAETVIA-STISIVNQ-VS-
-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIVGI
-AFSWIWAAVW-TAPPIFG-WSRYWP-------HGL----KTSCGPDVFSGSSYPGVQSYM
-IVLMVTCCITPLSIIVLCYLQVWLAI----RA----------------------------
-------------------------------------------------------------
-----------------------------------------------VAKQQKE-------
-------------------------------SESTQKAEKEVTRMVVVMVLAFC-------
------------------------------------------------------------
------------------------------------------------------------
-------------------------------------------------------------
---------------------------
+------------
> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
-M--------------AQQWSLQRLAGRHPQDSYE--------------------------
------------------------------------DSTQ--SSIFTYTN-----------
----------------------------------------------SNST---RGPFEGPN
-Y-HIAPRW------------V--------------------------------YHLTSVW
-MIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQ-VY-
-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VRFDAKLAIVGI
-AFSWIWAAVW-TAPPIFG-WSRYWP-------HGL----KTSCGPDVFSGSSYPGVQSYM
-IVLMVTCCITPLSIIVLCYLQVWLAI----RA----------------------------
-------------------------------------------------------------
-----------------------------------------------VAKQQKE-------
-------------------------------SESTQKAEKEVTRMVVVMVLAFCFCWGPYA
-FFACFAAA-NPG-Y--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF---
---GKK--------------------VDDGS--ELSSASKT-E--VSSV---SSV------
-------------------------------------------------------------
--------------------SP-A---
+MAQQWSLQRLA---------------------------------------GRHPQDSYED
+STQ--SSIFTYT------------NSNST---RGPFEGPN---------Y-HIAPRWV--
+-------YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVI
+AS-TISVVNQ-VY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFG
+NV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSS
+YPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA--------------------
+------------------------------------------------------------
+------------------------------------------------------VAKQQK
+---------------------------------ESESTQKAEKEVTRMVVVMVLAFCFCW
+GPYAFFACFAAA-NPGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF-
+--------GKK-VDD-GS---ELSSASKT------------------------EVSSV--
+------------------------------------------------------------
+--SSVSP-A---
> 10== Z68193 1 human Red Opsin <>[]
-M--------------AQQWSLQRLAGRHPQDSYE--------------------------
------------------------------------DSTQ--SSIFTYTN-----------
----------------------------------------------SNST---RGPFEGPN
-Y-HIAPRW------------V--------------------------------YHLTSVW
-MIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQ-VS-
-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAKLAIVGI
-AFSWIWSAVW-TAPPIFG-WSRYWP-------HGL----KTSCGPDVFSGSSYPGVQSYM
-IVLMVTCCIIPLAIIMLCYLQVWLAI----RA----------------------------
-------------------------------------------------------------
-----------------------------------------------VAKQQKE-------
-------------------------------SESTQKAEKEVTRMVVVMIFAYCVCWGPYT
-FFACFAAA-NPG-Y--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF---
---GKK--------------------VDDGS--ELSSASKT-E--VSSV---SSV------
-------------------------------------------------------------
--------------------SP-A---
+MAQQWSLQRLA---------------------------------------GRHPQDSYED
+STQ--SSIFTYT------------NSNST---RGPFEGPN---------Y-HIAPRWV--
+-------YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVI
+AS-TISIVNQ-VS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFG
+NV-RFDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSS
+YPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RA--------------------
+------------------------------------------------------------
+------------------------------------------------------VAKQQK
+---------------------------------ESESTQKAEKEVTRMVVVMIFAYCVCW
+GPYTFFACFAAA-NPGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF-
+--------GKK-VDD-GS---ELSSASKT------------------------EVSSV--
+------------------------------------------------------------
+--SSVSP-A---
> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92]
-M--------------TEAWNVAVFAARRSRDD-D--------------------------
------------------------------------DTTR--GSVFTYTN-----------
----------------------------------------------TNNT---RGPFEGPN
-Y-HIAPRW------------V--------------------------------YNLVSFF
-MIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQ-IF-
-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IKFDSKLAIIGI
-VFSWVWAWGW-SAPPIFG-WSRYWP-------HGL----KTSCGPDVFSGSVELGCQSFM
-LTLMITCCFLPLFIIIVCYLQVWMAI----RA----------------------------
-------------------------------------------------------------
-----------------------------------------------VAAQQKE-------
-------------------------------SESTQKAEREVSRMVVVMIVAFCICWGPYA
-SFVSFAAA-NPG-Y--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF---
---GKK--------------------VDDGS--EASTTSRT-E--VSSVS-NSSV------
-------------------------------------------------------------
--------------------AP-A---
+MTEAWNVAVFA---------------------------------------ARRSRDD-DD
+TTR--GSVFTYT------------NTNNT---RGPFEGPN---------Y-HIAPRWV--
+-------YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLV
+AS-TISVFNQ-IF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFG
+NI-KFDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSV
+ELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RA--------------------
+------------------------------------------------------------
+------------------------------------------------------VAAQQK
+---------------------------------ESESTQKAEREVSRMVVVMIVAFCICW
+GPYASFVSFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF-
+--------GKK-VDD-GS---EASTTSRT------------------------EVSSVS-
+------------------------------------------------------------
+-NSSVAP-A---
> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
-M--------------AA-WEAAFAARRRHEE--E--------------------------
------------------------------------DTTR--DSVFTYTN-----------
----------------------------------------------SNNT---RGPFEGPN
-Y-HIAPRW------------V--------------------------------YNLTSVW
-MIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQ-IS-
-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IKFDGKLAVAGI
-LFSWLWSCAW-TAPPIFG-WSRYWP-------HGL----KTSCGPDVFSGSSDPGVQSYM
-VVLMVTCCFFPLAIIILCYLQVWLAI----RA----------------------------
-------------------------------------------------------------
-----------------------------------------------VAAQQKE-------
-------------------------------SESTQKAEKEVSRMVVVMIVAYCFCWGPYT
-FFACFAAA-NPG-Y--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF---
---GKK--------------------VDDGS--EVST-SRT-E--VSSVS-NSSV------
-------------------------------------------------------------
--------------------SP-A---
+MA-AWEAAFAA---------------------------------------RRRHEE--ED
+TTR--DSVFTYT------------NSNNT---RGPFEGPN---------Y-HIAPRWV--
+-------YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVI
+AS-TISVINQ-IS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFG
+NI-KFDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSS
+DPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RA--------------------
+------------------------------------------------------------
+------------------------------------------------------VAAQQK
+---------------------------------ESESTQKAEKEVSRMVVVMIVAYCFCW
+GPYTFFACFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF-
+--------GKK-VDD-GS---EVST-SRT------------------------EVSSVS-
+------------------------------------------------------------
+-NSSVSP-A---
> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
-M--------------S-----------------S--------------------------
------------------------------------NSSQ--AP-----------------
----------------------------------------------PNGT---PGPFDGPQ
-WPYQAPQS------------T--------------------------------YVGVAVL
-MGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNN-IN-
-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQFQRRHAVSGC
-AFTWGWALLW-SAPPLLG-WSSYVP-------EGL----RTSCGPNWYTGGSNN--NSYI
-LSLFVTCFVLPLSLILFSYTNLLLTL----RA----------------------------
-------------------------------------------------------------
-----------------------------------------------AAAQQKE-------
-------------------------------ADTTQRAEREVTRMVIVMVMAFLLCWLPYS
-TFALVVAT-HKG-I--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCG
-YQPQR--------------------TGKAS--PGTPGPHA-D--VTAAGLRNKV------
-------------------------------------------------------------
--------------------MP-AHPV
+MS--------------------------------------------------------SN
+SSQ--AP------------------PNGT---PGPFDGPQ---------WPYQAPQST--
+-------YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLC
+GS-SVSLSNN-IN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLG
+DF-QFQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGS
+NN--NSYILSLFVTCFVLPLSLILFSYTNLLLTL----RA--------------------
+------------------------------------------------------------
+------------------------------------------------------AAAQQK
+---------------------------------EADTTQRAEREVTRMVIVMVMAFLLCW
+LPYSTFALVVAT-HKGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLC
+C----GYQPQR-TGK-AS---PGTPGPHA------------------------DVTAAG-
+------------------------------------------------------------
+LRNKVMP-AHPV
> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
-M--------------------------------ES-G-----------------------
------------------------------------NVSS-----------SLFGNVSTAL
-RPEA------------------RLSA---ETRLLGWNVPPEELR----------------
---HIPEHW------------LT---------YPEPPESM--------------NYLLGTL
-YIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYNS-FH-
-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIAMI
-IFIYMYATPW-VVACYTETWGRFVP-------EGY----LTSCTFDYLT--DNFDTRLFV
-ACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA----------------------------
-------------------------------------------------------------
-----------------------------------------------LRDQAKK--MNVES
-L------------------------RS-NVDKNKETAEIRIAKAAITICFLFFCSWTPYG
-VMSLIGAF-GDK-T--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQ-------
---KRCP----WLALNE---KAPE--SSAVA--STSTTQEP-Q------------------
--------QT---------------------------------------------------
--------------------TA-A---
+MESGNV-----------------------------------------------SSSLFGN
+VSTALRPEARLSA---ETRLLGW---------NVPPEELR-----------HIPEHWLTY
+PEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDF-MMMV
+KT-PIFIYNS-FH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPME
+GK--MTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--D
+NFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA--------------------
+------------------------------------------------------------
+------------------------------------------------------LRDQAK
+K--------------------MNVESL---RSNVDKNKETAEIRIAKAAITICFLFFCSW
+TPYGVMSLIGAF-GDKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCP
+W----LALNEKAPES--SAVASTSTTQEP------------------------QQT----
+------------------------------------------------------------
+-----TA-A---
> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
-M--------------------------------EY-H-----------------------
------------------------------------NVSS-----------VL-GNVSSVL
-RPDA------------------RLSA---ESRLLGWNVPPDELR----------------
---HIPEHW------------LI---------YPEPPESM--------------NYLLGTL
-YIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYNS-FH-
-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIAMI
-IFIYLYATPW-VVACYTESWGRFVP-------EGY----LTSCTFDYLT--DNFDTRLFV
-ACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA----------------------------
-------------------------------------------------------------
-----------------------------------------------LRDQAKK--MNVDS
-L------------------------RS-NVDKSKEAAEIRIAKAAITICFLFFASWTPYG
-VMSLIGAF-GDK-T--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQ-------
---KRCP----WLAISE---KAPE--SRAAI--STSTTQEQ-Q------------------
--------QT---------------------------------------------------
--------------------TA-A---
+MEYHNV-----------------------------------------------SSVL-GN
+VSSVLRPDARLSA---ESRLLGW---------NVPPDELR-----------HIPEHWLIY
+PEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDF-MMMI
+KT-PIFIYNS-FH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPME
+GK--MTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--D
+NFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA--------------------
+------------------------------------------------------------
+------------------------------------------------------LRDQAK
+K--------------------MNVDSL---RSNVDKSKEAAEIRIAKAAITICFLFFASW
+TPYGVMSLIGAF-GDKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCP
+W----LAISEKAPES--RAAISTSTTQEQ------------------------QQT----
+------------------------------------------------------------
+-----TA-A---
> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
-M--------------------------------EPLC-----------------------
------------------------------------NASE-----------PP-------L
-RPEA------------------R-SSGNGDLQFLGWNVPPDQIQ----------------
---YIPEHW------------LT---------QLEPPASM--------------HYMLGVF
-YIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--NS-FH-
-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVIMN
-IIIWLYCTPW-VVLPLTQFWDRFVP-------EGY----LTSCSFDYLS--DNFDTRLFV
-GTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA----------------------------
-------------------------------------------------------------
-----------------------------------------------LREQAKK--MNVES
-L------------------------RS-NVDKSKETAEIRIAKAAITICFLFFVSWTPYG
-VMSLIGAF-GDK-S--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQ-------
---KRCP----WLGVNE---KSGE--ISSAQ--ST-TTQEQ-Q------------------
--------QT---------------------------------------------------
--------------------TA-A---
+ME-----------------------------------------------------PLCNA
+SEPPLRPEAR-SSGNGDLQFLGW---------NVPPDQIQ-----------YIPEHWLTQ
+LEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDL-IMCL
+KA-PIF--NS-FH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMN
+RN--MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--D
+NFDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA--------------------
+------------------------------------------------------------
+------------------------------------------------------LREQAK
+K--------------------MNVESL---RSNVDKSKETAEIRIAKAAITICFLFFVSW
+TPYGVMSLIGAF-GDKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCP
+W----LGVNEKSGEI--SSAQST-TTQEQ------------------------QQT----
+------------------------------------------------------------
+-----TA-A---
> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
-M--------------------------------DALC-----------------------
------------------------------------NASE-----------PP-------L
-RPEA------------------RMSSGSDELQFLGWNVPPDQIQ----------------
---YIPEHW------------LT---------QLEPPASM--------------HYMLGVF
-YIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYNS-FH-
-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVIMN
-IIIWLYCTPW-VVLPLTQFWDRFVP-------EGY----LTSCSFDYLS--DNFDTRLFV
-GTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA----------------------------
-------------------------------------------------------------
-----------------------------------------------LREQAKK--MNVES
-L------------------------RS-NVDKSKETAEIRIAKAAITICFLFFVSWTPYG
-VMSLIGAF-GDK-S--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQ-------
---KRCP----WLGVNE---KSGE--ASSAQ--ST-TTQEQTQ------------------
--------QT---------------------------------------------------
--------------------SA-A---
+MD-----------------------------------------------------ALCNA
+SEPPLRPEARMSSGSDELQFLGW---------NVPPDQIQ-----------YIPEHWLTQ
+LEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDL-IMCL
+KA-PIFIYNS-FH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMN
+RN--MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--D
+NFDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA--------------------
+------------------------------------------------------------
+------------------------------------------------------LREQAK
+K--------------------MNVESL---RSNVDKSKETAEIRIAKAAITICFLFFVSW
+TPYGVMSLIGAF-GDKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCP
+W----LGVNEKSGEA--SSAQST-TTQEQ-----------------------TQQT----
+------------------------------------------------------------
+-----SA-A---
> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1
-M--------------------------------------------TNATGPQMAYYGAAS
-MDFGYPE----------------------------GVSI-----------VD--------
-------------------------------------FVRPEIKP----------------
---YVHQHW------------YN---------YPPVNPMW--------------HYLLGVI
-YLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNC-FSG
-GVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PKLTTGKAVVFA
-LISWVIAIGC-ALPPFFG-WGNYIL-------EGI----LDSCSYDYLT--QDFNTFSYN
-IFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA----------------------------
-------------------------------------------------------------
-----------------------------------------------MRAQAKK--MNVST
-L------------------------RS-N-EADAQRAEIRIAKTALVNVSLWFICWTPYA
-LISLKGVM-GDT-S--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAIT-------
---QHLP----WFCVHE---TETKS-NDDSQ--SNSTVAQ---------------------
-------------------------------------------------------------
--------------------DK-A---
+MT----------------------------------------------------------
+--NATGPQMAYYGAASMD-F-GY-PEGVSIVDFVRPEIKP-----------YVHQHWYNY
+PPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTT
+NV-PFFTYNC-FSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFN
+GP-KLTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--Q
+DFNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA--------------------
+------------------------------------------------------------
+------------------------------------------------------MRAQAK
+K--------------------MNVSTL---RS-NEADAQRAEIRIAKTALVNVSLWFICW
+TPYALISLKGVM-GDTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLP
+W----FCVHETETKS-NDDSQSNSTVAQ--------------------------------
+------------------------------------------------------------
+-----DK-A---
> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1
-M--------------------------------------------ANVTGPQMAFYGSGA
-ATFGYPE----------------------------GMTV-----------AD--------
-------------------------------------FVPDRVKH----------------
---MVLDHW------------YN---------YPPVNPMW--------------HYLLGVV
-YLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNC-FSG
-GRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PKLTQGKATFMC
-GLAWVISVGW-SLPPFFG-WGSYTL-------EGI----LDSCSYDYFT--RDMNTITYN
-ICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA----------------------------
-------------------------------------------------------------
-----------------------------------------------MRAQAKK--MNVTN
-L------------------------RS-N-EAETQRAEIRIAKTALVNVSLWFICWTPYA
-AITIQGLL-GNA-E--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAIT-------
---QHLP----WFCVHE---KDPND-VEENQ--SSNTQTQ---------------------
-------------------------------------------------------------
--------------------EK-S---
+MA----------------------------------------------------------
+--NVTGPQMAFYGSGAAT-F-GY-PEGMTVADFVPDRVKH-----------MVLDHWYNY
+PPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTT
+NF-PPFCYNC-FSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFN
+GP-KLTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--R
+DMNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA--------------------
+------------------------------------------------------------
+------------------------------------------------------MRAQAK
+K--------------------MNVTNL---RS-NEAETQRAEIRIAKTALVNVSLWFICW
+TPYAAITIQGLL-GNAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLP
+W----FCVHEKDPND-VEENQSSNTQTQ--------------------------------
+------------------------------------------------------------
+-----EK-S---
> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
-M--------------------------------E------SFAVAAAQLGPHFAPLS---
------------------------------------NGSV-----------VD--------
-------------------------------------KVTPDMAH----------------
---LISPYW------------NQ---------FPAMDPIW--------------AKILTAY
-MIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINL-YF-
-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALGKM
-----------------------YVP-------EGN----LTSCGIDYLE--RDWNPRSYL
-IFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA----------------------------
-------------------------------------------------------------
-----------------------------------------------MREQAKK--MNVKS
-L------------------------RS-S-EDAEKSAEGKLAKVALVTITLWFMAWTPYL
-VINCMGLF--KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK-------
---EKCP----CCVFGK---VDDGK-SSDAQ--SQATASEA-E------------------
-------------------------------------------------------------
--------------------SK-A---
+ME------------------------------------------------------SFAV
+AAAQLGPHFAPLS-------------NGSVVDKVTPDMAH-----------LISPYWNQF
+PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT
+NT-PMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMA
+GR-PMTIPLALGKM---------------------------YVPEGNLTSCGIDYLE--R
+DWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA--------------------
+------------------------------------------------------------
+------------------------------------------------------MREQAK
+K--------------------MNVKSL---RS-SEDAEKSAEGKLAKVALVTITLWFMAW
+TPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCP
+C----CVFGKVDDGK-SSDAQSQATASEA------------------------E------
+------------------------------------------------------------
+-----SK-A---
> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
-M--------------------------------E------SFAVAAAQLGPHFAPLS---
------------------------------------NGSV-----------VD--------
-------------------------------------KVTPDMAH----------------
---LISPYW------------NQ---------FPAMDPIW--------------AKILTAY
-MIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINL-YF-
-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALGKI
-AYIWFMSSIW-CLAPAFG-WSRYVP-------EGN----LTSCGIDYLE--RDWNPRSYL
-IFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA----------------------------
-------------------------------------------------------------
-----------------------------------------------MREQAKK--MNVKS
-L------------------------RS-S-EDAEKSAEGKLAKVALVTITLWFMAWTPYL
-VINCMGLF--KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK-------
---EKCP----CCVFGK---VDDGK-SSDAQ--SQATASEA-E------------------
-------------------------------------------------------------
--------------------SK-A---
+ME------------------------------------------------------SFAV
+AAAQLGPHFAPLS-------------NGSVVDKVTPDMAH-----------LISPYWNQF
+PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT
+NT-PMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMA
+GR-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--R
+DWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA--------------------
+------------------------------------------------------------
+------------------------------------------------------MREQAK
+K--------------------MNVKSL---RS-SEDAEKSAEGKLAKVALVTITLWFMAW
+TPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCP
+C----CVFGKVDDGK-SSDAQSQATASEA------------------------E------
+------------------------------------------------------------
+-----SK-A---
> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
-M--------------------------------D------SFAAVATQLGPQFAAPS---
------------------------------------NGSV-----------VD--------
-------------------------------------KVTPDMAH----------------
---LISPYW------------DQ---------FPAMDPIW--------------AKILTAY
-MIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINL-YF-
-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIPLALGKI
-AYIWFMSTIWCCLAPVFG-WSRYVP-------EGN----LTSCGIDYLE--RDWNPRSYL
-IFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA----------------------------
-------------------------------------------------------------
-----------------------------------------------MREQAKK--MNVKS
-L------------------------RS-S-EDADKSAEGKLAKVALVTISLWFMAWTPYL
-VINCMGLF--KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK-------
---EKCP----CCVFGK---VDDGK-SSEAQ--SQATTSEA-E------------------
-------------------------------------------------------------
--------------------SK-A---
+MD------------------------------------------------------SFAA
+VATQLGPQFAAPS-------------NGSVVDKVTPDMAH-----------LISPYWDQF
+PAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT
+NT-PMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMA
+GR-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--R
+DWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA--------------------
+------------------------------------------------------------
+------------------------------------------------------MREQAK
+K--------------------MNVKSL---RS-SEDADKSAEGKLAKVALVTISLWFMAW
+TPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCP
+C----CVFGKVDDGK-SSEAQSQATTSEA------------------------E------
+------------------------------------------------------------
+-----SK-A---
> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
-M--------------------------------ER-SHLPETPFDLAHSGPRFQAQSSG-
------------------------------------NGSV-----------LD--------
-------------------------------------NVLPDMAH----------------
---LVNPYW------------SR---------FAPMDPMM--------------SKILGLF
-TLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINF-YY-
-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIMKI
-LFIWMMAVFW-TVMPLIG-WSAYVP-------EGN----LTACSIDYMT--RMWNPRSYL
-ITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA----------------------------
-------------------------------------------------------------
-----------------------------------------------MREQAKK--MNVKS
-L------------------------RS-S-EDCDKSAEGKLAKVALTTISLWFMAWTPYL
-VICYFGLF--KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLK-------
---EKCP----MCVFGN---TDEPKPDAPAS--DTETTSEA-D------------------
-------------------------------------------------------------
--------------------SK-A---
+MERSHL-------------------------------------------------PETPF
+DLAHSGPRFQAQSSG-----------NGSVLDNVLPDMAH-----------LVNPYWSRF
+APMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMAS
+QS-PVMIINF-YY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGIN
+GT-PMTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--R
+MWNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA--------------------
+------------------------------------------------------------
+------------------------------------------------------MREQAK
+K--------------------MNVKSL---RS-SEDCDKSAEGKLAKVALTTISLWFMAW
+TPYLVICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCP
+M----CVFGNTDEPKPDAPASDTETTSEA------------------------D------
+------------------------------------------------------------
+-----SK-A---
> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
-M--------------------------------ER-SLLPEPPLAMALLGPRFEAQTGG-
------------------------------------NRSV-----------LD--------
-------------------------------------NVLPDMAP----------------
---LVNPHW------------SR---------FAPMDPTM--------------SKILGLF
-TLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINF-YY-
-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIKTSIMKI
-AFIWMMAVFW-TIMPLIG-WSSYVP-------EGN----LTACSIDYMT--RQWNPRSYL
-ITYSLFVYYTPLFMICYSYWFIIATVAAHEKA----------------------------
-------------------------------------------------------------
-----------------------------------------------MRDQAKK--MNVKS
-L------------------------RS-S-EDCDKSAENKLAKVALTTISLWFMAWTPYL
-IICYFGLF--KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLK-------
---EKCP----MCVCGT---TDEPKPDAPPS--DTETTSEA-E------------------
-------------------------------------------------------------
--------------------SK-D---
+MERSLL-------------------------------------------------PEPPL
+AMALLGPRFEAQTGG-----------NRSVLDNVLPDMAP-----------LVNPHWSRF
+APMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMAS
+QS-PVMIINF-YY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGIN
+GT-PMTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--R
+QWNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKA--------------------
+------------------------------------------------------------
+------------------------------------------------------MRDQAK
+K--------------------MNVKSL---RS-SEDCDKSAENKLAKVALTTISLWFMAW
+TPYLIICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCP
+M----CVCGTTDEPKPDAPPSDTETTSEA------------------------E------
+------------------------------------------------------------
+-----SK-D---
> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
-M--------------------------------------------IAVSGPSYEAFSYG-
----GQAR--------------------------FNNQTV-----------VD--------
-------------------------------------KVPPDMLH----------------
---LIDANW------------YQ---------YPPLNPMW--------------HGILGFV
-IGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINC-YY-
-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KPLSINGALIRI
-IAIWLFSLGW-TIAPMFG-WNRYVP-------EGN----MTACGTDYFN--RGLLSASYL
-VCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN----------------------------
-------------------------------------------------------------
-----------------------------------------------MREQAKK--MNVAS
-L------------------------RS-S-ENQNTSAECKLAKVALMTISLWFMAWTPYL
-VINFSGIF--NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALF-------
---AKFP----SLAC-A---AEPSS-DAVST--TSGTTTVT-D------------------
-------------------------------------------------------------
----------------NEK-SN-A---
+MI----------------------------------------------------------
+--AVSGPSYEAFSYGGQARF-----NNQTVVDKVPPDMLH-----------LIDANWYQY
+PPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFC
+MS-PPMVINC-YY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLS
+GK-PLSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--R
+GLLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN--------------------
+------------------------------------------------------------
+------------------------------------------------------MREQAK
+K--------------------MNVASL---RS-SENQNTSAECKLAKVALMTISLWFMAW
+TPYLVINFSGIF-NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFP
+S----LAC-AAEPSS-DAVSTTSGTTTVT------------------------DNEK---
+------------------------------------------------------------
+-----SN-A---
> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
-M----------------------------------------------ANQLSYSSLGWP-
----YQP-----------------------------NASV-----------VD--------
-------------------------------------TMPKEMLY----------------
---MIHEHW------------YA---------FPPMNPLW--------------YSILGVA
-MIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNC-FA-
-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-APLTHKKATLLL
-LFVWIWSGGW-TILPFFG-WSRYVP-------EGN----LTSCTVDYLT--KDWSSASYV
-VIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ----------------------------
-------------------------------------------------------------
-----------------------------------------------LREQAKK--MNVAS
-L------------------------RANA-DQQKQSAECRLAKVAMMTVGLWFMAWTPYL
-IISWAGVF--SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALY-------
---QRFP----SLACGS---GESGS-DVKSE--ASATTTME-E------------------
-------------------------------------------------------------
----------------KPKIPE-A---
-> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
M-----------------------------------------------------------
----VESTTLV------------------NQTWWY-NPTV-----------D---------
-------------------------------------------------------------
----IHPHW------------AK---------FDPIPDAV--------------YYSVGIF
-IGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISA-FM-
-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMI
-IFVWMWSIVW-SVGPVFN-WGAYVP-------EGI----LTSCSFDYLS--TDPSTRSFI
-LCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE----------------------------
-------------------------------------------------------------
-----------------------------------------------MAAMAKR--LNAKE
-L------------------------RK---AQAGASAEMKLAKISMVIITQFMLSWSPYA
-IIALLAQF-GPA-E--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQ-------
---TTFPWLLTCCQFDE---KECED-ANDAE--EEVVASER---GGESRD-AAQMKEMMAM
-MQKMQAQQAAYQPPPPPQGY--PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAP
-PQVEAPQGAPPQGVDNQA-YQ-A---
+---ANQLSYSSLGWPYQP--------NASVVDTMPKEMLY-----------MIHEHWYAF
+PPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAF
+MM-PTMTSNC-FA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMA
+AA-PLTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--K
+DWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ--------------------
+------------------------------------------------------------
+------------------------------------------------------LREQAK
+K--------------------MNVASL---RANADQQKQSAECRLAKVAMMTVGLWFMAW
+TPYLIISWAGVF-SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFP
+S----LACGSGESGS-DVKSEASATTTME------------------------EKPK---
+------------------------------------------------------------
+----IPE-A---
+> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
+MVESTTL--------------------------------------------VNQ------
+---------------------TW-WYNPTVD---------------------IHPHWAKF
+DPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAI
+NGFPLKTISA-FM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMA
+ASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--T
+DPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE--------------------
+------------------------------------------------------------
+------------------------------------------------------MAAMAK
+R--------------------LNAKEL---RK--AQAGASAEMKLAKISMVIITQFMLSW
+SPYAIIALLAQF-GPAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFP
+WLLTCCQFDEKECED-ANDAEEEVVASER--GGESRDAAQMKEMMAMMQKMQAQQAAYQP
+PPPPQGY--PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQG
+VDNQAYQ-A---
> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93]
-M-----------------------------------------------------------
----GRDLR-D------------------NETWWY-NPSI-----------V---------
-------------------------------------------------------------
----VHPHW------------RE---------FDQVPDAV--------------YYSLGIF
-IGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISC-FL-
-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMI
-IFVWLWSVLW-AIGPIFG-WGAYTL-------EGV----LCNCSFDYIS--RDSTTRSNI
-LCMFILGFFGPILIIFFCYFNIVMSVSNHEKE----------------------------
-------------------------------------------------------------
-----------------------------------------------MAAMAKR--LNAKE
-L------------------------RK---AQAGANAEMRLAKISIVIVSQFLLSWSPYA
-VVALLAQF-GPL-E--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAIS-------
---QTFPWVLTCCQFDD---KETED-DKDAE--TEIPAGES-SDAAPSAD-AAQMKEMMAM
-MQKMQQQQAAY----PPQGYAPPPQGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAP
-PQ-GAPPAAPPQGVDNQA-YQ-A---
+MGRDLR---------------------------------------------DNE------
+---------------------TW-WYNPSIV---------------------VHPHWREF
+DQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLV
+NGFPLMTISC-FL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMA
+ASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--R
+DSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKE--------------------
+------------------------------------------------------------
+------------------------------------------------------MAAMAK
+R--------------------LNAKEL---RK--AQAGANAEMRLAKISIVIVSQFLLSW
+SPYAVVALLAQF-GPLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFP
+WVLTCCQFDDKETED-DKDAETEIPAGESSDAAPSADAAQMKEMMAMMQKMQQQQAAY--
+--PPQGYAPPPQGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQG
+VDNQAYQ-A---
> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra
-MMDVNSSGRPDLY-----------------------------------------------
------------------GHLRSFL--LPEVGR---GLPD--------------------L
-SPDGGADPVAGSWAPHLLS---EVTA----------------------------------
---SPAPTW-----------------------DAPPDNAS--GCGEQINYGRVEKVVIGSI
-LTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIG-
-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMI
-LSVWLLSASI-TLPPLFG-WAQNVN--DDK---------VCLISQDF----------GYT
-IYSTAVAFYIPMSVMLFMYYQIYKAAR---------------------------------
------------------KSAAKHKFPGFPRVEPDSVI----------------------A
-LNGIVK----------------------LQKEVEECAN--------LSRLLKH-------
--------------------------ER-K-NISIFKREQKAATTLGIIVGAFTVCWLPFF
-LLSTARPFICGT-SCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYR-SLL---
-----------QCQYRN-INRKLSA-AGMHE--ALKLAERP-E--------RPEF-----V
-LQNADY--------CRKKGHD---------------------------------------
-----------------------S---
+MMDVNSSGRPDLY----GHLRSFLLP--EVGRGLPDLSPDGGADPVAGSWAPHLLS---E
+VTASPAPTW-----------------------DAPPDNAS--GCGEQINY----------
+---GRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVA
+VM-PFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLT
+YPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VCLIS------
+--QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-----------------
+PGF---------------------------------PRVEPDSVI---------------
+-------ALNGIVK----------------------LQKEVEECAN--------LSRLLK
+H-----------------------------ER-KNISIFKREQKAATTLGIIVGAFTVCW
+LPFFLLSTARPFICGTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQ
+C--QYRNINRK---------LSAAGMHEA---------------LKLAERP--ERPEFVL
+Q-----------------------------------------------------------
+NADYCRKKGHDS
> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
-M-----------------------------------------------------------
-------------------------------------------------------------
---------------PHLLSGFLEVTA----------------------------------
---SPAPTW-----------------------DAPPDNVS--GCGEQINYGRVEKVVIGSI
-LTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIG-
-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMI
-LSVWLLSASI-TLPPLFG-WAQNVN--DDK---------VCLISQDF----------GYT
-IYSTAVAFYIPMSVMLFMYYQIYKAAR---------------------------------
------------------KSAAKHKFPGFPRVQPESVI----------------------S
-LNGVVK----------------------LQKEVEECAN--------LSRLLKH-------
--------------------------ER-K-NISIFKREQKAATTLGIIVGAFTVCWLPFF
-LLSTARPFICGT-SCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSR-SLL---
-----------QCQYRN-INRKLSA-AGMHE--ALKLAERP-E--------RSEF-----V
-LQNSDH--------CGKKGHD---------------------------------------
-----------------------T---
+M--------------------------------------------------PHLLSGFLE
+VTASPAPTW-----------------------DAPPDNVS--GCGEQINY----------
+---GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVA
+VM-PFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLT
+YPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VCLIS------
+--QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-----------------
+PGF---------------------------------PRVQPESVI---------------
+-------SLNGVVK----------------------LQKEVEECAN--------LSRLLK
+H-----------------------------ER-KNISIFKREQKAATTLGIIVGAFTVCW
+LPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQ
+C--QYRNINRK---------LSAAGMHEA---------------LKLAERP--ERSEFVL
+Q-----------------------------------------------------------
+NSDHCGKKGHDT
> 31=p A47425 serotonin receptor 5HT-7 - rat
-M-----------------------------------------------------------
-------------------------------------------------------------
---------------PHLLSGFLEVTA----------------------------------
---SPAPTW-----------------------DAPPDNVS--GCGEQINYGRVEKVVIGSI
-LTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIG-
-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMI
-LSVWLLSASI-TLPPLFG-WAQNVN--DDK---------VCLISQDF----------GYT
-IYSTAVAFYIPMSVMLFMYYQIYKAAR---------------------------------
------------------KSAAKHKFPGFPRVQPESVI----------------------S
-LNGVVK----------------------LQKEVEECAN--------LSRLLKH-------
--------------------------ER-K-NISIFKREQKAATTLGIIVGAFTVCWLPFF
-LLSTARPFICGT-SCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYR-SLL---
-----------QCQYRN-INRKLSA-AGMHE--ALKLAERP-E--------RSEF-----V
-LQNSDH--------CGKKGHD---------------------------------------
-----------------------T---
+M--------------------------------------------------PHLLSGFLE
+VTASPAPTW-----------------------DAPPDNVS--GCGEQINY----------
+---GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVA
+VM-PFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLT
+YPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VCLIS------
+--QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-----------------
+PGF---------------------------------PRVQPESVI---------------
+-------SLNGVVK----------------------LQKEVEECAN--------LSRLLK
+H-----------------------------ER-KNISIFKREQKAATTLGIIVGAFTVCW
+LPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQ
+C--QYRNINRK---------LSAAGMHEA---------------LKLAERP--ERSEFVL
+Q-----------------------------------------------------------
+NSDHCGKKGHDT
> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
-M-DVLSPGQ---------------------------------------------------
------------------GNNTTSPPAPFETGG---NTTG--------------------I
--------------------------S----------------------------------
---DVTVSY---------------------------------------------QVITSLL
-LGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQ-VL-
-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALI
-SLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---------ACTISKDH----------GYT
-IYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK----------------------------
------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGSRNWRLGVESKAGGAL-C
-ANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAP-----ASFERK-------
-----NERNA----------------EA-K-RKMALARERKTVKTLGIIMGTFILCWLPFF
-IVALVLPF-CES-SC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK-KII---
-----------KCKFCR--------------------------------------------
-------------------------------------------------------------
-----------------------Q---
+M-DVLSPGQ--------GNNTTSPPAPFETGGNTTGI-----------------------
+--SDVTVSY---------------------------------------------------
+-------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVL
+VL-PMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPID
+YVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---ACTIS------
+--KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK--------------------
+-------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGSRNWRLGVES
+KAGGAL-CANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAP-----ASFER
+K-----------NERNA-------------EA-KRKMALARERKTVKTLGIIMGTFILCW
+LPFFIVALVLPF-CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIK
+C--KFCR-----------------------------------------------------
+------------------------------------------------------------
+-----------Q
> 33=p A35181 serotonin receptor class 1A - rat
-M-DVFSFGQ---------------------------------------------------
------------------GNNTTASQEPFGTGG---NVTS--------------------I
--------------------------S----------------------------------
---DVTFSY---------------------------------------------QVITSLL
-LGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQ-VL-
-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALI
-SLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---------ACTISKDH----------GYT
-IYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK----------------------------
------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGDWRRCAENRAVGTP-C
-TNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAP-----ACLERK-------
-----NERNA----------------EA-K-RKMALARERKTVKTLGIIMGTFILCWLPFF
-IVALVLPF-CES-SC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK-KII---
-----------KCKFCR--------------------------------------------
-------------------------------------------------------------
-----------------------R---
+M-DVFSFGQ--------GNNTTASQEPFGTGGNVTSI-----------------------
+--SDVTFSY---------------------------------------------------
+-------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVL
+VL-PMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPID
+YVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---ACTIS------
+--KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK--------------------
+-------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGDWRRCAEN
+RAVGTP-CTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAP-----ACLER
+K-----------NERNA-------------EA-KRKMALARERKTVKTLGIIMGTFILCW
+LPFFIVALVLPF-CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIK
+C--KFCR-----------------------------------------------------
+------------------------------------------------------------
+-----------R
> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
-M-ANFTFGDLALD-----------------------------------------------
--------------VARMGGLASTPSGLRSTGL---TTPG--------------------L
-SPTG------------------LVTS----------------------------------
---DFNDSYGLTGQFINGSHSSRSRDNASAN-DTSATNMTDDRYWSLTVYSHEHLVLTSVI
-LGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSE-IS-
-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMI
-MVVWIVALFI-SIPPLFG-WRDPNN--DPD------KTGTCIISQDK----------GYT
-IFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVV
-SDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK----------------NRAKKLPEN
-ANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS-IAMLERQ-CNNGKK
-ISSNDTPYS----------------RT-R-EKLELKRERKAARTLAIITGAFLICWLPFF
-IIALIGPF-VDP-E--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ-KIL---
-----------FGKYRR--------------------------------------------
-------------------GH----------------------------------------
-----------------------R---
+M-ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG------------------L
+VTSDFNDSYGLTGQFINGSHSSRSRDNASAN-DTSATNMTDDRYWSLTVY----------
+---SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVL
+VM-PLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-ID
+YIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTCIIS------
+--QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVAS
+PKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK----------------N
+RAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS-IAMLER
+Q-CNNGKKISSNDTPYS-------------RT-REKLELKRERKAARTLAIITGAFLICW
+LPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILF
+G--KYRRG----------------------------------------------------
+------------------------------------------------------------
+----------HR
> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
-M-ANFTFGDLALD-----------------------------------------------
--------------VARMGGLASTPSGLRSTGL---TTPG--------------------L
-SPTG------------------LVTS----------------------------------
---DFNDSYGLTGQFINGSHSSRSRDNASAN-DTSATNMTDDRYWSLTVYSHEHLVLTSVI
-LGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSE-IS-
-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMI
-MVVWIVALFI-SIPPLFG-WRDPNN--DPD------KTGTCIISQDK----------GYT
-IFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVV
-SDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK----------------NRAKKLPEN
-ANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS-IAMLERQ-CNNGKK
-ISSNDTPYS----------------RT-R-EKLELKRERKAARTLAIITGAFLICWLPFF
-IIALIGPF-VDP-E--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ-KIL---
-----------FGKYRR--------------------------------------------
-------------------GH----------------------------------------
-----------------------R---
+M-ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG------------------L
+VTSDFNDSYGLTGQFINGSHSSRSRDNASAN-DTSATNMTDDRYWSLTVY----------
+---SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVL
+VM-PLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-ID
+YIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTCIIS------
+--QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVAS
+PKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK----------------N
+RAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS-IAMLER
+Q-CNNGKKISSNDTPYS-------------RT-REKLELKRERKAARTLAIITGAFLICW
+LPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILF
+G--KYRRG----------------------------------------------------
+------------------------------------------------------------
+----------HR
> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi
-M-EGAE-GQEELDWEA--------------------------------------------
----------LYLRLP-----------LQNCSW---NSTG--------------------W
-EPNW------------------NVTV----------------------------------
---VPNTTW----------------WQASAPFDTPAALVR--------------AAAKAVV
-LGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYE-VV-
-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGMMI
-ACVWTVSFFV-CIAQLLG-WK------DPDWNQRVSEDLRCVVSQDV----------GYQ
-IFATASSFYVPVLIILILYWRIYQTARKRIRR----------------------------
------------------RRGATARGGVGPPPVP---------------------------
-AGGALVAGGGSGGIAAAVVAVIGRP---LPTISETTTTGFTNVSSNNTSPEKQSCANGLE
-A---DPPTTGYGAVAAAYYPSLVRRKP-K-EAADSKRERKAAKTLAIITGAFVACWLPFF
-VLAILVPT-CDC-E---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQ-RLL---
-----------CGRRVRRR------------------------------------------
------------------RAP----------------------------------------
-----------------------Q---
+M-EGAE-GQEELD----WEALYLRLPLQNCSWNSTGWEPNW------------------N
+VTVVPNTTW----------------WQASAPFDTPAALVR--------------------
+-------AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACL
+VM-PLGAVYE-VV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-ID
+YIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDL--RCVVS------
+--QDVGYQIFATASSFYVPVLIILILYWRIYQTARKRIRR--------------------
+-------------------------RRGATARGGVGPPPVP-------------------
+--------AGGALVAGGGSGGIAAAVVAVIGRP---LPTISETTTTGFTNVSSNNTSPEK
+QSCANGLEADPPTTGYGAVAAAYYPSLVRRKP-KEAADSKRERKAAKTLAIITGAFVACW
+LPFFVLAILVPT-CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLC
+G--RRVRRRR--------------------------------------------------
+------------------------------------------------------------
+---------APQ
> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
-------------------------------------------------------------
----------MNGTE--GDNFYVP----FSNKTGLARSPYEYPQY-YLAEPWK--------
--YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVT
-MYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FRFGN
-THAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNNESY
-VVYMFVVHFLVPFVIIFFCYGRLLCTV---------------------------------
-------------------------------------------------------------
-------------------------------KEAAAAQQ----------ESASTQKA----
--------------------------EKEVTRMVVLMVIGFLVCWVPYASVAFYIFTHQGS
-D---FGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCCGKN------PLGDDE
---SGASTSKTEVSSVS-TSPVSPA------------------------------------
----------------------------------------------------
+-------------------MNGTE------------------------GDNF--------
+YVP----F-SNKTGLARSPY----------------EYPQY-------YLAEPWK-----
+----YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-F
+TVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FR
+FGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNN
+ESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE------------------------
+---------------------------------------------------AAAAQQ---
+------------------------------------------------------------
+--------------ESASTQK------AEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-
+HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC---------GKN
+PLGD-DE--SGASTSKTEVSSVS-TSPV--------------------------------
+--------------------------------------------SPA-------------
+------
> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]
-------------------------------------------------------------
----------MNGTE--GPNFYVP----FSNITGVVRSPFEQPQY-YLAEPWQ--------
--FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTT
-LYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FRFGE
-NHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNNESF
-VIYMFVVHFTIPMIVIFFCYGQLVFTV---------------------------------
-------------------------------------------------------------
-------------------------------KEAAAQQQ----------ESATTQKA----
--------------------------EKEVTRMVIIMVIFFLICWLPYASVAMYIFTHQGS
-N---FGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCCGKN------PLGDDE
---ASATASKTE------TSQVAPA------------------------------------
----------------------------------------------------
+-------------------MNGTE------------------------GPNF--------
+YVP----F-SNITGVVRSPF----------------EQPQY-------YLAEPWQ-----
+----FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-F
+TTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FR
+FGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNN
+ESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE------------------------
+---------------------------------------------------AAAQQQ---
+------------------------------------------------------------
+--------------ESATTQK------AEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-
+HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC---------GKN
+PLGD-DE--ASATASKTE------TSQV--------------------------------
+--------------------------------------------APA-------------
+------
> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9
-------------------------------------------------------------
----------MNGTE--GINFYVP----MSNKTGVVRSPFEYPQY-YLAEPWK--------
--YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVT
-FYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FRFSA
-THAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHNESY
-VLYMFVIHFIIPVVVIFFSYGRLICKV---------------------------------
-------------------------------------------------------------
-------------------------------REAAAQQQ----------ESATTQKA----
--------------------------EKEVTRMVILMVLGFMLAWTPYAVVAFWIFTNKGA
-D---FTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICCGKN------PFGDED
-VSSTVSQSKTEVSSVS-SSQVSPA------------------------------------
----------------------------------------------------
+-------------------MNGTE------------------------GINF--------
+YVP----M-SNKTGVVRSPF----------------EYPQY-------YLAEPWK-----
+----YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-F
+TVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FR
+FSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHN
+ESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE------------------------
+---------------------------------------------------AAAQQQ---
+------------------------------------------------------------
+--------------ESATTQK------AEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-
+NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC---------GKN
+PFGD-EDVSSTVSQSKTEVSSVS-SSQV--------------------------------
+--------------------------------------------SPA-------------
+------
> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
-------------------------------------------------------------
----------MNGTE--GKNFYVP----MSNRTGLVRSPFEYPQY-YLAEPWQ--------
--FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVT
-FYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSS
-SHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNNESY
-VIYMFVCHFILPVAVIFFTYGRLVCTV---------------------------------
-------------------------------------------------------------
-------------------------------KAAAAQQQ----------DSASTQKA----
--------------------------EREVTKMVILMVFGFLIAWTPYATVAAWIFFNKGA
-D---FSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFCGKN------PLGDDE
--SSTVSTSKTEVSS------VSPA------------------------------------
----------------------------------------------------
+-------------------MNGTE------------------------GKNF--------
+YVP----M-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ-----
+----FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-F
+TVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FK
+FSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNN
+ESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA------------------------
+---------------------------------------------------AAAQQQ---
+------------------------------------------------------------
+--------------DSASTQK------AEREVTKMVILMVFGFLIAWTPYATVAAWIFF-
+NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC---------GKN
+PLGD-DE-SSTVSTSKTEVSS------V--------------------------------
+--------------------------------------------SPA-------------
+------
> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
-------------------------------------------------------------
----------MNGTE--GNNFYVP----LSNRTGLVRSPFEYPQY-YLAEPWQ--------
--FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVT
-FYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSS
-THASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNNESY
-VLYMFICHFILPVTIIFFTYGRLVCTV---------------------------------
-------------------------------------------------------------
-------------------------------KAAAAQQQ----------DSASTQKA----
--------------------------EREVTKMVILMVLGFLVAWTPYATVAAWIFFNKGA
-A---FSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFCGKN------PLGDEE
--SSTVSTSKTEVSS------VSPA------------------------------------
----------------------------------------------------
+-------------------MNGTE------------------------GNNF--------
+YVP----L-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ-----
+----FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-F
+TVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FK
+FSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNN
+ESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA------------------------
+---------------------------------------------------AAAQQQ---
+------------------------------------------------------------
+--------------DSASTQK------AEREVTKMVILMVLGFLVAWTPYATVAAWIFF-
+NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC---------GKN
+PLGD-EE-SSTVSTSKTEVSS------V--------------------------------
+--------------------------------------------SPA-------------
+------
> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208-
-------------------------------------------------------------
----------MKQVPEFHEDFYIPIPLDINNLS--AYSPFLVPQD-HLGNQGI--------
--FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLS
-FYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FTFKT
-PHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNNESY
-VMFLFCFCFAVPFGTIVFCYGQLLITL---------------------------------
-------------------------------------------------------------
-------------------------------KLAAKAQA----------DSASTQKA----
--------------------------EREVTKMVVVMVLGFLVCWAPYASFSLWIVSHRGE
-E---FDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVCGKN-------IEEDE
---ASTSSQVTQVSS------VAPEK-----------------------------------
----------------------------------------------------
+-------------------MKQVPEF----------------------HEDF--------
+YIPIPLDI-NNLS--AYSPF----------------LVPQD-------HLGNQGI-----
+----FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-S
+PLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FT
+FKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNN
+ESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KL------------------------
+---------------------------------------------------AAKAQA---
+------------------------------------------------------------
+--------------DSASTQK------AEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-
+HRGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC---------GKN
+-IEE-DE--ASTSSQVTQVSS------V--------------------------------
+--------------------------------------------APEK------------
+------
> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
-------------------------------------------------------------
----------MRKMS--EEEFYL-----FKNISSV--GPWDGPQY-HIAPVWA--------
--FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPV
-FVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FRFSS
-KHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRSESY
-TWFLFIFCFIVPLSLICFSYTQLLRAL---------------------------------
-------------------------------------------------------------
-------------------------------KAVAAQQQ----------ESATTQKA----
--------------------------EREVSRMVVVMVGSFCVCYVPYAAFAMYMVNNRNH
-G---LDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVCGKA-------MTDES
---DTCSSQKTEVSTVS-STQVGPN------------------------------------
----------------------------------------------------
+-------------------MRKMS------------------------EEEF--------
+YL-----F-KNIS--SVGPW----------------DGPQY-------HIAPVWA-----
+----FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-V
+FPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FR
+FSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRS
+ESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KA------------------------
+---------------------------------------------------VAAQQQ---
+------------------------------------------------------------
+--------------ESATTQK------AEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-
+NRNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC---------GKA
+-MTD-ES--DTCSSQKTEVSTVS-STQV--------------------------------
+--------------------------------------------GPN-------------
+------
> 8=opsin, greensensitive human (fragment) S07060
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------DLAETVIA-STIS
-IVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDA
-KLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSY
-MIVLMVTCCITPLSIIVLCYLQVWLAI---------------------------------
+--------------------------------------------------DLAETVIA-S
+TISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VR
+FDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV
+QSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA------------------------
+---------------------------------------------------VAKQQK---
------------------------------------------------------------
-------------------------------RAVAKQQK----------ESESTQKA----
--------------------------EKEVTRMVVVMVLAFC-------------------
+--------------ESESTQK------AEKEVTRMVVVMVLAFC----------------
------------------------------------------------------------
------------------------------------------------------------
----------------------------------------------------
+------------------------------------------------------------
+------
> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
-----------------------------------------------------------MA
-QQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNS-TRGPFEGPNY-HIAPRWV--------
--YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STIS
-VVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VRFDA
-KLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSY
-MIVLMVTCCITPLSIIVLCYLQVWLAI---------------------------------
-------------------------------------------------------------
-------------------------------RAVAKQQK----------ESESTQKA----
--------------------------EKEVTRMVVVMVLAFCFCWGPYAFFACFAAANPGY
-P---FHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF-----------GKKVDD
-GSELSSASKTEVSSV---SSVSPA------------------------------------
----------------------------------------------------
+-------------------MAQQWSL----------QRLAGRHPQDSYEDST--------
+QSSI-FTY-TNSNS-TRGPF----------------EGPNY-------HIAPRWV-----
+----YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-S
+TISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VR
+FDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV
+QSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA------------------------
+---------------------------------------------------VAKQQK---
+------------------------------------------------------------
+--------------ESESTQK------AEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-
+NPGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF----------GKK
+-VDD-GS--ELSSASKTEVSSV---SSV--------------------------------
+--------------------------------------------SPA-------------
+------
> 10== Z68193 1 human Red Opsin <>[]
-----------------------------------------------------------MA
-QQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNS-TRGPFEGPNY-HIAPRWV--------
--YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STIS
-IVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDA
-KLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSY
-MIVLMVTCCIIPLAIIMLCYLQVWLAI---------------------------------
-------------------------------------------------------------
-------------------------------RAVAKQQK----------ESESTQKA----
--------------------------EKEVTRMVVVMIFAYCVCWGPYTFFACFAAANPGY
-A---FHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF-----------GKKVDD
-GSELSSASKTEVSSV---SSVSPA------------------------------------
----------------------------------------------------
+-------------------MAQQWSL----------QRLAGRHPQDSYEDST--------
+QSSI-FTY-TNSNS-TRGPF----------------EGPNY-------HIAPRWV-----
+----YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-S
+TISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VR
+FDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGV
+QSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RA------------------------
+---------------------------------------------------VAKQQK---
+------------------------------------------------------------
+--------------ESESTQK------AEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-
+NPGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF----------GKK
+-VDD-GS--ELSSASKTEVSSV---SSV--------------------------------
+--------------------------------------------SPA-------------
+------
> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92]
-----------------------------------------------------------MT
-EAWNVAVFAARRSRDD-DDTTRGSVFTYTNTNN-TRGPFEGPNY-HIAPRWV--------
--YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STIS
-VFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IKFDS
-KLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGCQSF
-MLTLMITCCFLPLFIIIVCYLQVWMAI---------------------------------
-------------------------------------------------------------
-------------------------------RAVAAQQK----------ESESTQKA----
--------------------------EREVSRMVVVMIVAFCICWGPYASFVSFAAANPGY
-A---FHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF-----------GKKVDD
-GSEASTTSRTEVSSVS-NSSVAPA------------------------------------
----------------------------------------------------
+-------------------MTEAWNV----------AVFAARRSRDD-DDTT--------
+RGSV-FTY-TNTNN-TRGPF----------------EGPNY-------HIAPRWV-----
+----YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-S
+TISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IK
+FDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGC
+QSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RA------------------------
+---------------------------------------------------VAAQQK---
+------------------------------------------------------------
+--------------ESESTQK------AEREVSRMVVVMIVAFCICWGPYASFVSFAAA-
+NPGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF----------GKK
+-VDD-GS--EASTTSRTEVSSVS-NSSV--------------------------------
+--------------------------------------------APA-------------
+------
> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
------------------------------------------------------------M
-AAWEAAFAARRRHEE--EDTTRDSVFTYTNSNN-TRGPFEGPNY-HIAPRWV--------
--YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STIS
-VINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IKFDG
-KLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGVQSY
-MVVLMVTCCFFPLAIIILCYLQVWLAI---------------------------------
-------------------------------------------------------------
-------------------------------RAVAAQQK----------ESESTQKA----
--------------------------EKEVSRMVVVMIVAYCFCWGPYTFFACFAAANPGY
-A---FHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF-----------GKKVDD
-GSEVST-SRTEVSSVS-NSSVSPA------------------------------------
----------------------------------------------------
+-------------------MA-AWEA----------AFAARRRHEE--EDTT--------
+RDSV-FTY-TNSNN-TRGPF----------------EGPNY-------HIAPRWV-----
+----YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-S
+TISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IK
+FDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGV
+QSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RA------------------------
+---------------------------------------------------VAAQQK---
+------------------------------------------------------------
+--------------ESESTQK------AEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-
+NPGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF----------GKK
+-VDD-GS--EVST-SRTEVSSVS-NSSV--------------------------------
+--------------------------------------------SPA-------------
+------
> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
-------------------------------------------------------------
----------------------MSSNSSQAPPNG-TPGPFDGPQWPYQAPQST--------
--YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVS
-LSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQFQR
-RHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN--NSY
-ILSLFVTCFVLPLSLILFSYTNLLLTL---------------------------------
-------------------------------------------------------------
-------------------------------RAAAAQQK----------EADTTQRA----
--------------------------EREVTRMVIVMVMAFLLCWLPYSTFALVVATHKGI
-I---IQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLCCGYQ------PQRTGK
-ASPGTPGPHADVTAAGLRNKVMPAHPV---------------------------------
----------------------------------------------------
+-------------------MS---------------------------SNSS--------
+QAP--------PNG-TPGPF----------------DGPQW------PYQAPQST-----
+----YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-S
+SVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQ
+FQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN--
+NSYILSLFVTCFVLPLSLILFSYTNLLLTL----RA------------------------
+---------------------------------------------------AAAQQK---
+------------------------------------------------------------
+--------------EADTTQR------AEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-
+HKGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCGY-----QPQR
+-TGK-AS--PGTPGPHADVTAAGLRNKV--------------------------------
+--------------------------------------------MPAHP---V-------
+------
> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
-------------------------------------------------------MESGNV
-SSSLFGNVSTALRPEARLSA---E-----TRLLGWNVPPEELR--HIPEHWLTYPEPPES
-MNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIF
-IYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTH
-GKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDTRLF
-VACIFFFSFVCPTTMITYYYSQIVGHVFSH------------------------------
-------------------------------------------------------------
------------------------------EKALRDQAKKMNVESLRSNVDKNKETA----
--------------------------EIRIAKAAITICFLFFCSWTPYGVMSLIGAFGDKT
-L---LTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL---------NEK
-APESSAVASTSTTQEPQQTTAA--------------------------------------
----------------------------------------------------
+----------MESGNVSSSLFGNVST----------ALRPEARL----SA----------
+-ETRLLGW--------NVPP----------------EELR--------HIPEHWLTYPEP
+PESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--T
+PIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--K
+MTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDT
+RLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA------------------------
+---------------------------------------------------LRDQAKKM-
+--------------------------------NVESL-----------------------
+-----------RSNVDKNKET------AEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-
+GDKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL--------N
+EKAP-ES-SAVASTSTTQEP-QQ-TTAA--------------------------------
+------------------------------------------------------------
+------
> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
-------------------------------------------------------MEYHNV
-SSVL-GNVSSVLRPDARLSA---E-----SRLLGWNVPPDELR--HIPEHWLIYPEPPES
-MNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIF
-IYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTH
-GKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDTRLF
-VACIFFFSFVCPTTMITYYYSQIVGHVFSH------------------------------
-------------------------------------------------------------
------------------------------EKALRDQAKKMNVDSLRSNVDKSKEAA----
--------------------------EIRIAKAAITICFLFFASWTPYGVMSLIGAFGDKT
-L---LTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI---------SEK
-APESRAAISTSTTQEQQQTTAA--------------------------------------
----------------------------------------------------
+----------MEYHNVSSVL-GNVSS----------VLRPDARL----SA----------
+-ESRLLGW--------NVPP----------------DELR--------HIPEHWLIYPEP
+PESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--T
+PIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--K
+MTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDT
+RLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA------------------------
+---------------------------------------------------LRDQAKKM-
+--------------------------------NVDSL-----------------------
+-----------RSNVDKSKEA------AEIRIAKAAITICFLFFASWTPYGVMSLIGAF-
+GDKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI--------S
+EKAP-ES-RAAISTSTTQEQ-QQ-TTAA--------------------------------
+------------------------------------------------------------
+------
> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
-------------------------------------------------------ME----
---PLCNASEPPLRPEAR-SSGNGD-----LQFLGWNVPPDQIQ--YIPEHWLTQLEPPAS
-MHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF
---NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTF
-TKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLF
-VGTIFFFSFVCPTLMILYYYSQIVGHVFSH------------------------------
-------------------------------------------------------------
------------------------------EKALREQAKKMNVESLRSNVDKSKETA----
--------------------------EIRIAKAAITICFLFFVSWTPYGVMSLIGAFGDKS
-L---LTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV---------NEK
-SGEISSAQSTTTQEQ-QQTTAA--------------------------------------
----------------------------------------------------
+----------ME------PLCNASEP----------PLRPEAR-----SSGN--------
+GDLQFLGW--------NVPP----------------DQIQ--------YIPEHWLTQLEP
+PASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--A
+PIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--N
+MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDT
+RLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA------------------------
+---------------------------------------------------LREQAKKM-
+--------------------------------NVESL-----------------------
+-----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-
+GDKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV--------N
+EKSG-EI-SSAQST-TTQEQ-QQ-TTAA--------------------------------
+------------------------------------------------------------
+------
> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
-------------------------------------------------------MD----
---ALCNASEPPLRPEARMSSGSDE-----LQFLGWNVPPDQIQ--YIPEHWLTQLEPPAS
-MHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF
-IYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTF
-TKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLF
-VGTIFLFSFVVPTLMILYYYSQIVGHVFNH------------------------------
-------------------------------------------------------------
------------------------------EKALREQAKKMNVESLRSNVDKSKETA----
--------------------------EIRIAKAAITICFLFFVSWTPYGVMSLIGAFGDKS
-L---LTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV---------NEK
-SGEASSAQSTTTQEQTQQTSAA--------------------------------------
----------------------------------------------------
+----------MD------ALCNASEP----------PLRPEARM----SSGS--------
+DELQFLGW--------NVPP----------------DQIQ--------YIPEHWLTQLEP
+PASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--A
+PIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--N
+MTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDT
+RLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA------------------------
+---------------------------------------------------LREQAKKM-
+--------------------------------NVESL-----------------------
+-----------RSNVDKSKET------AEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-
+GDKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV--------N
+EKSG-EA-SSAQST-TTQEQTQQ-TSAA--------------------------------
+------------------------------------------------------------
+------
> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1
-------------------------------------------------------------
----MTNATGPQMAYYGAASMDFGYPE---GVSIVDFVRPEIKP--YVHQHWYNYPPVNPM
-WHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFF
-TYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PKLTT
-GKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNTFSY
-NIFIFVFDYFLPAAIIVFSYVFIVKAIFAH------------------------------
-------------------------------------------------------------
------------------------------EAAMRAQAKKMNVSTLRS-NEADAQRA----
--------------------------EIRIAKTALVNVSLWFICWTPYALISLKGVMGDTS
-G---ITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HETETK
-SNDDSQSNSTVAQDKA--------------------------------------------
----------------------------------------------------
+-------------------MTNATGP----------QMAYYGAA----SMD---------
+-----FGYPEGVSIVDFVRP----------------EIKP--------YVHQHWYNYPPV
+NPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-V
+PFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PK
+LTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNT
+FSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA------------------------
+---------------------------------------------------MRAQAKKM-
+--------------------------------NVSTL-----------------------
+-----------RS-NEADAQR------AEIRIAKTALVNVSLWFICWTPYALISLKGVM-
+GDTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HET
+ETKS-ND-DSQSNSTVAQDKA---------------------------------------
+------------------------------------------------------------
+------
> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1
-------------------------------------------------------------
----MANVTGPQMAFYGSGAATFGYPE---GMTVADFVPDRVKH--MVLDHWYNYPPVNPM
-WHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPF
-CYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PKLTQ
-GKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNTITY
-NICIFIFDFFLPASVIVFSYVFIVKAIFAH------------------------------
-------------------------------------------------------------
------------------------------EAAMRAQAKKMNVTNLRS-NEAETQRA----
--------------------------EIRIAKTALVNVSLWFICWTPYAAITIQGLLGNAE
-G---ITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HEKDPN
-DVEENQSSNTQTQEKS--------------------------------------------
----------------------------------------------------
+-------------------MANVTGP----------QMAFYGSG----AAT---------
+-----FGYPEGMTVADFVPD----------------RVKH--------MVLDHWYNYPPV
+NPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-F
+PPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PK
+LTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNT
+ITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA------------------------
+---------------------------------------------------MRAQAKKM-
+--------------------------------NVTNL-----------------------
+-----------RS-NEAETQR------AEIRIAKTALVNVSLWFICWTPYAAITIQGLL-
+GNAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HEK
+DPND-VE-ENQSSNTQTQEKS---------------------------------------
+------------------------------------------------------------
+------
> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
----------------------------------------------------------MES
-FAVAAAQLGPHFA-----PLS--------NGSVVDKVTPDMAH--LISPYWNQFPAMDPI
-WAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMM
-GINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTI
-PLALGKM---------------------------YVPEGNLTSCGIDYLE--RDWNPRSY
-LIFYSIFVYYIPLFLICYSYWFIIAAVSAH------------------------------
-------------------------------------------------------------
------------------------------EKAMREQAKKMNVKSLRS-SEDAEKSA----
--------------------------EGKLAKVALVTITLWFMAWTPYLVINCMGLFKF-E
-G---LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKVDDG
-K-SSDAQSQATASEAESKA-----------------------------------------
----------------------------------------------------
+----------ME---SFAVAAAQLGP----------HFAPLS------------------
+----------NGSVVDKVTP----------------DMAH--------LISPYWNQFPAM
+DPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T
+PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP
+MTIPLALGKM---------------------------YVPEGNLTSCGIDYLE--RDWNP
+RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------
+---------------------------------------------------MREQAKKM-
+--------------------------------NVKSL-----------------------
+-----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-
+KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKV
+DDGK-SS-DAQSQATASEAESKA-------------------------------------
+------------------------------------------------------------
+------
> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
----------------------------------------------------------MES
-FAVAAAQLGPHFA-----PLS--------NGSVVDKVTPDMAH--LISPYWNQFPAMDPI
-WAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMM
-GINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTI
-PLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSY
-LIFYSIFVYYIPLFLICYSYWFIIAAVSAH------------------------------
-------------------------------------------------------------
------------------------------EKAMREQAKKMNVKSLRS-SEDAEKSA----
--------------------------EGKLAKVALVTITLWFMAWTPYLVINCMGLFKF-E
-G---LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKVDDG
-K-SSDAQSQATASEAESKA-----------------------------------------
----------------------------------------------------
+----------ME---SFAVAAAQLGP----------HFAPLS------------------
+----------NGSVVDKVTP----------------DMAH--------LISPYWNQFPAM
+DPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T
+PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP
+MTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNP
+RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------
+---------------------------------------------------MREQAKKM-
+--------------------------------NVKSL-----------------------
+-----------RS-SEDAEKS------AEGKLAKVALVTITLWFMAWTPYLVINCMGLF-
+KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKV
+DDGK-SS-DAQSQATASEAESKA-------------------------------------
+------------------------------------------------------------
+------
> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
----------------------------------------------------------MDS
-FAAVATQLGPQFA-----APS--------NGSVVDKVTPDMAH--LISPYWDQFPAMDPI
-WAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMM
-GINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTI
-PLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSY
-LIFYSIFVYYIPLFLICYSYWFIIAAVSAH------------------------------
-------------------------------------------------------------
------------------------------EKAMREQAKKMNVKSLRS-SEDADKSA----
--------------------------EGKLAKVALVTISLWFMAWTPYLVINCMGLFKF-E
-G---LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKVDDG
-K-SSEAQSQATTSEAESKA-----------------------------------------
----------------------------------------------------
+----------MD---SFAAVATQLGP----------QFAAPS------------------
+----------NGSVVDKVTP----------------DMAH--------LISPYWDQFPAM
+DPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T
+PMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RP
+MTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNP
+RSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA------------------------
+---------------------------------------------------MREQAKKM-
+--------------------------------NVKSL-----------------------
+-----------RS-SEDADKS------AEGKLAKVALVTISLWFMAWTPYLVINCMGLF-
+KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKV
+DDGK-SS-EAQSQATTSEAESKA-------------------------------------
+------------------------------------------------------------
+------
> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
-----------------------------------------------------MERSHLPE
-TPFDLAHSGPRFQ-----AQSSG------NGSVLDNVLPDMAH--LVNPYWSRFAPMDPM
-MSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVM
-IINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTI
-KTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNPRSY
-LITYSLFVYYTPLFLICYSYWFIIAAVAAH------------------------------
-------------------------------------------------------------
------------------------------EKAMREQAKKMNVKSLRS-SEDCDKSA----
--------------------------EGKLAKVALTTISLWFMAWTPYLVICYFGLFKI-D
-G---LTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GNTDEP
-KPDAPASDTETTSEADSKA-----------------------------------------
----------------------------------------------------
+-----MERSHLP---ETPFDLAHSGP----------RFQAQSSG----------------
+----------NGSVLDNVLP----------------DMAH--------LVNPYWSRFAPM
+DPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-S
+PVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TP
+MTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNP
+RSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA------------------------
+---------------------------------------------------MREQAKKM-
+--------------------------------NVKSL-----------------------
+-----------RS-SEDCDKS------AEGKLAKVALTTISLWFMAWTPYLVICYFGLF-
+KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GNT
+DEPKPDA-PASDTETTSEADSKA-------------------------------------
+------------------------------------------------------------
+------
> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
-----------------------------------------------------MERSLLPE
-PPLAMALLGPRFE-----AQTGG------NRSVLDNVLPDMAP--LVNPHWSRFAPMDPT
-MSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVM
-IINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTI
-KTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNPRSY
-LITYSLFVYYTPLFMICYSYWFIIATVAAH------------------------------
-------------------------------------------------------------
------------------------------EKAMRDQAKKMNVKSLRS-SEDCDKSA----
--------------------------ENKLAKVALTTISLWFMAWTPYLIICYFGLFKI-D
-G---LTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GTTDEP
-KPDAPPSDTETTSEAESKD-----------------------------------------
----------------------------------------------------
+-----MERSLLP---EPPLAMALLGP----------RFEAQTGG----------------
+----------NRSVLDNVLP----------------DMAP--------LVNPHWSRFAPM
+DPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-S
+PVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TP
+MTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNP
+RSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKA------------------------
+---------------------------------------------------MRDQAKKM-
+--------------------------------NVKSL-----------------------
+-----------RS-SEDCDKS------AENKLAKVALTTISLWFMAWTPYLIICYFGLF-
+KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GTT
+DEPKPDA-PPSDTETTSEAESKD-------------------------------------
+------------------------------------------------------------
+------
> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
-------------------------------------------------------------
----MIAVSGPSYE-----AFSYGGQARFNNQTVVDKVPPDMLH--LIDANWYQYPPLNPM
-WHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPM
-VINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KPLSI
-NGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLSASY
-LVCYGIWVYFVPLFLIIYSYWFIIQAVAAH------------------------------
-------------------------------------------------------------
------------------------------EKNMREQAKKMNVASLRS-SENQNTSA----
--------------------------ECKLAKVALMTISLWFMAWTPYLVINFSGIFNL-V
-K---ISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------AAEPS
-SDAVSTTSGTTTVTDNEKSNA---------------------------------------
----------------------------------------------------
+-------------------MIAVSGP----------SYEAFSYG----GQA---------
+----RF---NNQTVVDKVPP----------------DMLH--------LIDANWYQYPPL
+NPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-S
+PPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KP
+LSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLS
+ASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN------------------------
+---------------------------------------------------MREQAKKM-
+--------------------------------NVASL-----------------------
+-----------RS-SENQNTS------AECKLAKVALMTISLWFMAWTPYLVINFSGIF-
+NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------AA
+EPSS-DA-VSTTSGTTTVTDNEK-SNA---------------------------------
+------------------------------------------------------------
+------
> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
-------------------------------------------------------------
------MANQLSYS-----SLGWPYQP---NASVVDTMPKEMLY--MIHEHWYAFPPMNPL
-WYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTM
-TSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-APLTH
-KKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSSASY
-VVIYGLAVYFLPLITMIYCYFFIVHAVAEH------------------------------
-------------------------------------------------------------
------------------------------EKQLREQAKKMNVASLRANADQQKQSA----
--------------------------ECRLAKVAMMTVGLWFMAWTPYLIISWAGVFSSGT
-R---LTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GSGESG
-SDVKSEASATTTMEEKPKIPEA--------------------------------------
----------------------------------------------------
+---------------------MANQL----------SYSSLGWP----YQP---------
+----------NASVVDTMPK----------------EMLY--------MIHEHWYAFPPM
+NPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-M
+PTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-AP
+LTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSS
+ASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ------------------------
+---------------------------------------------------LREQAKKM-
+--------------------------------NVASL-----------------------
+-----------RANADQQKQS------AECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-
+SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GSG
+ESGS-DV-KSEASATTTMEEKPK-IPEA--------------------------------
+------------------------------------------------------------
+------
> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
-------------------------------------------------------------
-----------MVESTTLVNQTWWY-----NPTVD------------IHPHWAKFDPIPDA
-VYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLK
-TISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSH
-RRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPSTRSF
-ILCMYFCGFMLPIIIIAFCYFNIVMSVSNH------------------------------
-------------------------------------------------------------
------------------------------EKEMAAMAKRLNAKELR--KAQAGASA----
--------------------------EMKLAKISMVIITQFMLSWSPYAIIALLAQFGPAE
-W---VTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKECEDA
-NDAEEEVVASER--GGESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PPQGYPPQ
-GAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA
+------------------------------------MVESTTLV----NQT---------
+-----WWY--NPTVD----------------------------------IHPHWAKFDPI
+PDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGF
+PLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKK
+MSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPST
+RSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE------------------------
+---------------------------------------------------MAAMAKRL-
+--------------------------------NAKEL-----------------------
+-----------R--KAQAGAS------AEMKLAKISMVIITQFMLSWSPYAIIALLAQF-
+GPAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKEC
+EDAN-DA-EEEVVASER--GGES-RDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PP
+QGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQG---VDNQAYQA
+------
> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93]
-------------------------------------------------------------
------------MGRDLRDNETWWY-----NPSIV------------VHPHWREFDQVPDA
-VYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLM
-TISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSH
-RRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTTRSN
-ILCMFILGFFGPILIIFFCYFNIVMSVSNH------------------------------
-------------------------------------------------------------
------------------------------EKEMAAMAKRLNAKELR--KAQAGANA----
--------------------------EMRLAKISIVIVSQFLLSWSPYAVVALLAQFGPLE
-W---VTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKETEDD
-KDAETEIPAGESSDAAPSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPPQGYPPQ
-GY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQA
+-------------------------------------MGRDLRD----NET---------
+-----WWY--NPSIV----------------------------------VHPHWREFDQV
+PDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGF
+PLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKK
+MSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTT
+RSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKE------------------------
+---------------------------------------------------MAAMAKRL-
+--------------------------------NAKEL-----------------------
+-----------R--KAQAGAN------AEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-
+GPLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKET
+EDDK-DA-ETEIPAGESSDAAPS-ADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPP
+QGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQG---VDNQAYQA
+------
> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra
----------MMDVNSSGRPDLYGHLRSFLLPEVGRGLPDLSPDGGADPVAGSWAPHLLS-
---EVTASPAPTW------------DAPPDNASGCGEQIN-----------------YGRV
-EKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFV
-SVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNG
-KCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GY
-TIYSTAVAFYIPMSVMLFMYYQIYKAAR--------------------------------
-------------------KSAAKHKFPGFPRVEPDSVIAL--------------------
------------------------------NGIVKLQKEVEECANLSRLL-----------
--------------KHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGT
-SCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYRNINRKLSAAGMH
-EALKLAERPERPEFVLQNADYCRKKGHDS-------------------------------
----------------------------------------------------
+---------MMD-------VNSSGRPDLYGHLRSF-LLPEVGRGLPDLSPDGGADPVAGS
+WAPHLLS---EVTASPAPTW----------------DAPPDNASGCGEQIN--------Y
+GRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-M
+PFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVR
+QNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF---------
+-GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF---------------------
+----------------------------------P--------GFPR----VEPDS----
+---VIAL-----------------NGIVKLQ--------KEVEECAN-------------
+-----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFI
+CGTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR---------
+-----NINRKLSAAGMHEALKLA-------------------------------------
+-------------------------------------------ERPERPEFVLQNADYCR
+KKGHDS
> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
------------------------------------------------------MPHLLSG
-FLEVTASPAPTW------------DAPPDNVSGCGEQIN-----------------YGRV
-EKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFV
-SVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNG
-KCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GY
-TIYSTAVAFYIPMSVMLFMYYQIYKAAR--------------------------------
-------------------KSAAKHKFPGFPRVQPESVISL--------------------
------------------------------NGVVKLQKEVEECANLSRLL-----------
--------------KHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGT
-SCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYRNINRKLSAAGMH
-EALKLAERPERSEFVLQNSDHCGKKGHDT-------------------------------
----------------------------------------------------
+------------------------------------------------------------
+-MPHLLSGFLEVTASPAPTW----------------DAPPDNVSGCGEQIN--------Y
+GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-M
+PFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVR
+QNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF---------
+-GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF---------------------
+----------------------------------P--------GFPR----VQPES----
+---VISL-----------------NGVVKLQ--------KEVEECAN-------------
+-----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFI
+CGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYR---------
+-----NINRKLSAAGMHEALKLA-------------------------------------
+-------------------------------------------ERPERSEFVLQNSDHCG
+KKGHDT
> 31=p A47425 serotonin receptor 5HT-7 - rat
------------------------------------------------------MPHLLSG
-FLEVTASPAPTW------------DAPPDNVSGCGEQIN-----------------YGRV
-EKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFV
-SVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNG
-KCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GY
-TIYSTAVAFYIPMSVMLFMYYQIYKAAR--------------------------------
-------------------KSAAKHKFPGFPRVQPESVISL--------------------
------------------------------NGVVKLQKEVEECANLSRLL-----------
--------------KHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGT
-SCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYRNINRKLSAAGMH
-EALKLAERPERSEFVLQNSDHCGKKGHDT-------------------------------
----------------------------------------------------
+------------------------------------------------------------
+-MPHLLSGFLEVTASPAPTW----------------DAPPDNVSGCGEQIN--------Y
+GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-M
+PFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVR
+QNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF---------
+-GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF---------------------
+----------------------------------P--------GFPR----VQPES----
+---VISL-----------------NGVVKLQ--------KEVEECAN-------------
+-----LSRLLKHERKNISIFK------REQKAATTLGIIVGAFTVCWLPFFLLSTARPFI
+CGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYR---------
+-----NINRKLSAAGMHEALKLA-------------------------------------
+-------------------------------------------ERPERSEFVLQNSDHCG
+KKGHDT
> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
-----------MDVLSPGQ-------GN----------------------NTTSPPAPFET
-GGNTTGISDVTV-----------------------------------------------S
-YQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMA
-ALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTP
-RRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPDA---CTISKDH----------GY
-TIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK---------------------------
-------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR----
------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG--PT
-PCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CES
-SCH-MPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFCRQ----------
-------------------------------------------------------------
----------------------------------------------------
+----------MD-------VLSPG------------QGNNTTSPPAPFETGG--------
+----------NTTGISDVTV----------------------------------------
+--SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L
+PMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK
+RTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPDA---CTISKDH---------
+-GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------
+---------------TVKKVEKTGADTRHGASPAPQPKKS-----------VNGESGSR-
+--------NWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAG-
+-PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-
+CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC---------
+-----RQ-----------------------------------------------------
+------------------------------------------------------------
+------
> 33=p A35181 serotonin receptor class 1A - rat
-----------MDVFSFGQ-------GN----------------------NTTASQEPFGT
-GGNVTSISDVTF-----------------------------------------------S
-YQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMA
-ALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTP
-RRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPDA---CTISKDH----------GY
-TIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK---------------------------
-------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG----
------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG--SN
-SYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CES
-SCH-MPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFCRR----------
-------------------------------------------------------------
----------------------------------------------------
+----------MD-------VFSFG------------QGNNTTASQEPFGTGG--------
+----------NVTSISDVTF----------------------------------------
+--SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L
+PMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK
+RTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPDA---CTISKDH---------
+-GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------
+---------------TVRKVEKKGAGTSLGTSSAPPPKKS-----------LNGQPGSG-
+--------DWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESG-
+-SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-
+CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFC---------
+-----RR-----------------------------------------------------
+------------------------------------------------------------
+------
> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
-MANFTFGDLALDVARMGGLASTPSGLR----------------------STGLTTPGLSP
-TGLVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATN-----MTDDRYWSLTVYSHE
-HLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLS
-VVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSA
-RRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GY
-TIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSV
-VSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS-----
------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS----
----------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-VDP
-E-G-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYRRGHR--------
-------------------------------------------------------------
----------------------------------------------------
+MANFTFGDLALD-------VARMG-----GLASTPSGLRSTGLTTPGLSPTG--------
+----------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVY
+SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M
+PLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRR
+RSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK---------
+-GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTE
+YSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--
+--------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-
+------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-
+VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR---------
+-----RGHR---------------------------------------------------
+------------------------------------------------------------
+------
> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
-MANFTFGDLALDVARMGGLASTPSGLR----------------------STGLTTPGLSP
-TGLVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATN-----MTDDRYWSLTVYSHE
-HLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLS
-VVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSA
-RRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GY
-TIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSV
-VSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS-----
------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS----
----------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-VDP
-E-G-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYRRGHR--------
-------------------------------------------------------------
----------------------------------------------------
+MANFTFGDLALD-------VARMG-----GLASTPSGLRSTGLTTPGLSPTG--------
+----------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVY
+SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M
+PLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRR
+RSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK---------
+-GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTE
+YSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSS--
+--------SERLKQIQIETAEAFANGCA----EEASIAMLERQ-CNNGKKISSNDTPYS-
+------------RTREKLELK------RERKAARTLAIITGAFLICWLPFFIIALIGPF-
+VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR---------
+-----RGHR---------------------------------------------------
+------------------------------------------------------------
+------
> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi
--MEGAEGQEELDWEAL--YLRLP--LQ----------------------NCSWNSTGWEP
-NWNVTVVPNTTW---------WQASAPFDTPAALVRAAAK--------------------
------AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLG
-AVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTA
-KRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GY
-QIFATASSFYVPVLIILILYWRIYQTARKRIR----------------------------
------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGIAAA
-VVAVIGRPLPTISETTTTGFTNVSS----NNTS---PEKQSCANGLEADPPTTGYGAVAA
-AYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT-CDC
-E---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRVRRRRAPQ-----
-------------------------------------------------------------
----------------------------------------------------
+-MEGAEGQEELD-------WEAL-------YLRLP--LQNCSWNSTGWEPNW--------
+----------NVTVVPNTTW---------WQASAPFDTPAALVRAAAK------------
+--------AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-M
+PLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHA
+STAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV---------
+-GYQIFATASSFYVPVLIILILYWRIYQTARKRIR-------------------------
+--------------------RRRGATARGGVGPPP---------VPAGGALVAGGGSGGI
+AAAVVAVIGRPLPTISETTTTGFTNVSS----NNTS---PEKQSCANGLEADPPTTGYGA
+VAAAYYPSLVRRKPKEAADSK------RERKAAKTLAIITGAFVACWLPFFVLAILVPT-
+CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRV---------
+-----RRRRA--------------------------------------------------
+---------------------------------------------PQ-------------
+------
> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
-------------------------------------------------------------
----------MNGTE--GDNFYVP----FSNKTGLARSPYEYPQYYLAEPWK---------
-YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTM
-YTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-FRFGNT
-HAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFNNESYV
-VYMFVVHFLVPFVIIFFCYGRLLCTV----------------------------------
-------------------------------------------------------------
--------------------KEAAAAQQ-----------------------------ESAS
-TQKA----------EKEVTRMVVLMVIGFLVCWVPYASVAFYIFT---HQGSD-FGATFM
-TLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCCGKN------PLGDDE--SGASTSKT
-EVSSVS-TSPVSPA----------------------------------------------
------------------------------------------
+--------------------MNGTE--------------------------GDNF-----
+---YVP----F-SNKTGLARSPY----------------EYPQY-------YLAEPWK--
+-------YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLF
+G-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN
+-FRFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPN
+FNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KEAAAAQQ---------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------ESASTQKAEKEVTRMVVLMVIGFLVCWVPYASV
+AFYIFTHQGS---DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC-----
+--GKNPLGD-DE--SGASTSKTEVSSVS--TSPV--------------------------
+----------------------------------------------SPA-----------
+---
> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]
-------------------------------------------------------------
----------MNGTE--GPNFYVP----FSNITGVVRSPFEQPQYYLAEPWQ---------
-FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTL
-YTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-FRFGEN
-HAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVNNESFV
-IYMFVVHFTIPMIVIFFCYGQLVFTV----------------------------------
-------------------------------------------------------------
--------------------KEAAAQQQ-----------------------------ESAT
-TQKA----------EKEVTRMVIIMVIFFLICWLPYASVAMYIFT---HQGSN-FGPIFM
-TLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCCGKN------PLGDDE--ASATASKT
-ETSQ------VAPA----------------------------------------------
------------------------------------------
+--------------------MNGTE--------------------------GPNF-----
+---YVP----F-SNITGVVRSPF----------------EQPQY-------YLAEPWQ--
+-------FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFG
+G-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN
+-FRFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPE
+VNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KEAAAQQQ---------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------ESATTQKAEKEVTRMVIIMVIFFLICWLPYASV
+AMYIFTHQGS---NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC-----
+--GKNPLGD-DE--ASATASKTE-------TSQV--------------------------
+----------------------------------------------APA-----------
+---
> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9
-------------------------------------------------------------
----------MNGTE--GINFYVP----MSNKTGVVRSPFEYPQYYLAEPWK---------
-YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTF
-YTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-FRFSAT
-HAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYHNESYV
-LYMFVIHFIIPVVVIFFSYGRLICKV----------------------------------
-------------------------------------------------------------
--------------------REAAAQQQ-----------------------------ESAT
-TQKA----------EKEVTRMVILMVLGFMLAWTPYAVVAFWIFT---NKGAD-FTATLM
-AVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICCGKN------PFGDEDVSSTVSQSKT
-EVSSVS-SSQVSPA----------------------------------------------
------------------------------------------
+--------------------MNGTE--------------------------GINF-----
+---YVP----M-SNKTGVVRSPF----------------EYPQY-------YLAEPWK--
+-------YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACF
+G-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN
+-FRFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPD
+YHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----REAAAQQQ---------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------ESATTQKAEKEVTRMVILMVLGFMLAWTPYAVV
+AFWIFTNKGA---DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC-----
+--GKNPFGD-EDVSSTVSQSKTEVSSVS--SSQV--------------------------
+----------------------------------------------SPA-----------
+---
> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
-------------------------------------------------------------
----------MNGTE--GKNFYVP----MSNRTGLVRSPFEYPQYYLAEPWQ---------
-FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTF
-YTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSSS
-HAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYNNESYV
-IYMFVCHFILPVAVIFFTYGRLVCTV----------------------------------
-------------------------------------------------------------
--------------------KAAAAQQQ-----------------------------DSAS
-TQKA----------EREVTKMVILMVFGFLIAWTPYATVAAWIFF---NKGAD-FSAKFM
-AIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFCGKN------PLGDDE-SSTVSTSKT
-EVSS------VSPA----------------------------------------------
------------------------------------------
+--------------------MNGTE--------------------------GKNF-----
+---YVP----M-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ--
+-------FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCF
+G-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS
+-FKFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPD
+YNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KAAAAQQQ---------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------DSASTQKAEREVTKMVILMVFGFLIAWTPYATV
+AAWIFFNKGA---DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC-----
+--GKNPLGD-DE-SSTVSTSKTEVSS-------V--------------------------
+----------------------------------------------SPA-----------
+---
> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
-------------------------------------------------------------
----------MNGTE--GNNFYVP----LSNRTGLVRSPFEYPQYYLAEPWQ---------
-FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTF
-YTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-FKFSST
-HASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYNNESYV
-LYMFICHFILPVTIIFFTYGRLVCTV----------------------------------
-------------------------------------------------------------
--------------------KAAAAQQQ-----------------------------DSAS
-TQKA----------EREVTKMVILMVLGFLVAWTPYATVAAWIFF---NKGAA-FSAQFM
-AIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFCGKN------PLGDEE-SSTVSTSKT
-EVSS------VSPA----------------------------------------------
------------------------------------------
+--------------------MNGTE--------------------------GNNF-----
+---YVP----L-SNRTGLVRSPF----------------EYPQY-------YLAEPWQ--
+-------FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCF
+G-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS
+-FKFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPE
+YNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KAAAAQQQ---------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------DSASTQKAEREVTKMVILMVLGFLVAWTPYATV
+AAWIFFNKGA---AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC-----
+--GKNPLGD-EE-SSTVSTSKTEVSS-------V--------------------------
+----------------------------------------------SPA-----------
+---
> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208-
-------------------------------------------------------------
----------MKQVPEFHEDFYIPIPLDINNLS--AYSPFLVPQDHLGNQGI---------
-FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSF
-YSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-FTFKTP
-HAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYNNESYV
-MFLFCFCFAVPFGTIVFCYGQLLITL----------------------------------
-------------------------------------------------------------
--------------------KLAAKAQA-----------------------------DSAS
-TQKA----------EREVTKMVVVMVLGFLVCWAPYASFSLWIVS---HRGEE-FDLRMA
-TIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVCGKN-------IEEDE--ASTSSQVT
-QVSS------VAPEK---------------------------------------------
------------------------------------------
+--------------------MKQVPEF------------------------HEDF-----
+---YIPIPLDI-NNLS--AYSPF----------------LVPQD-------HLGNQGI--
+-------FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIF
+G-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN
+-FTFKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNK
+YNNESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KLAAKAQA---------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------DSASTQKAEREVTKMVVVMVLGFLVCWAPYASF
+SLWIVSHRGE---EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC-----
+--GKN-IEE-DE--ASTSSQVTQVSS-------V--------------------------
+----------------------------------------------APEK----------
+---
> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
-------------------------------------------------------------
----------MRKMS--EEEFYL-----FKNISSV--GPWDGPQYHIAPVWA---------
-FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVF
-VASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-FRFSSK
-HALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYRSESYT
-WFLFIFCFIVPLSLICFSYTQLLRAL----------------------------------
-------------------------------------------------------------
--------------------KAVAAQQQ-----------------------------ESAT
-TQKA----------EREVSRMVVVMVGSFCVCYVPYAAFAMYMVN---NRNHG-LDLRLV
-TIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVCGKA-------MTDES--DTCSSQKT
-EVSTVS-STQVGPN----------------------------------------------
------------------------------------------
+--------------------MRKMS--------------------------EEEF-----
+---YL-----F-KNIS--SVGPW----------------DGPQY-------HIAPVWA--
+-------FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIF
+S-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN
+-FRFSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTK
+YRSESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KAVAAQQQ---------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------ESATTQKAEREVSRMVVVMVGSFCVCYVPYAAF
+AMYMVNNRNH---GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC-----
+--GKA-MTD-ES--DTCSSQKTEVSTVS--STQV--------------------------
+----------------------------------------------GPN-----------
+---
> 8=opsin, greensensitive human (fragment) S07060
------------------------------------------------------------
------------------------------------------------------------
-----------------------------------------------DLAETVIA-STISI
-VNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAK
-LAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYM
-IVLMVTCCITPLSIIVLCYLQVWLAI----------------------------------
+-----------------------------------------------------DLAETVI
+A-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN
+-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSY
+PGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RAVAKQQK---------------
+------------------------------------------------------------
------------------------------------------------------------
--------------------RAVAKQQK-----------------------------ESES
-TQKA----------EKEVTRMVVVMVLAFC------------------------------
+---------------------------ESESTQKAEKEVTRMVVVMVLAFC---------
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------
+------------------------------------------------------------
+---
> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
-----------------------------------------------------------MA
-QQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNS-TRGPFEGPNYHIAPRWV---------
-YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISV
-VNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-VRFDAK
-LAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYM
-IVLMVTCCITPLSIIVLCYLQVWLAI----------------------------------
-------------------------------------------------------------
--------------------RAVAKQQK-----------------------------ESES
-TQKA----------EKEVTRMVVVMVLAFCFCWGPYAFFACFAAA---NPGYP-FHPLMA
-ALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF-----------GKKVDDGSELSSASKT
-EVSSV---SSVSPA----------------------------------------------
------------------------------------------
+--------------------MAQQWSL------------QRLAGRHPQDSYEDST-----
+---QSSI-FTY-TNSNS-TRGPF----------------EGPNY-------HIAPRWV--
+-------YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVI
+A-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN
+-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSY
+PGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RAVAKQQK---------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------ESESTQKAEKEVTRMVVVMVLAFCFCWGPYAFF
+ACFAAANPGY---PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF------
+--GKK-VDD-GS--ELSSASKTEVSSV----SSV--------------------------
+----------------------------------------------SPA-----------
+---
> 10== Z68193 1 human Red Opsin <>[]
-----------------------------------------------------------MA
-QQWSLQRLAGRHPQDSYEDSTQSSIFTYTNSNS-TRGPFEGPNYHIAPRWV---------
-YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISI
-VNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-VRFDAK
-LAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPGVQSYM
-IVLMVTCCIIPLAIIMLCYLQVWLAI----------------------------------
-------------------------------------------------------------
--------------------RAVAKQQK-----------------------------ESES
-TQKA----------EKEVTRMVVVMIFAYCVCWGPYTFFACFAAA---NPGYA-FHPLMA
-ALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF-----------GKKVDDGSELSSASKT
-EVSSV---SSVSPA----------------------------------------------
------------------------------------------
+--------------------MAQQWSL------------QRLAGRHPQDSYEDST-----
+---QSSI-FTY-TNSNS-TRGPF----------------EGPNY-------HIAPRWV--
+-------YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVI
+A-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN
+-VRFDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSY
+PGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RAVAKQQK---------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------ESESTQKAEKEVTRMVVVMIFAYCVCWGPYTFF
+ACFAAANPGY---AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF------
+--GKK-VDD-GS--ELSSASKTEVSSV----SSV--------------------------
+----------------------------------------------SPA-----------
+---
> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92]
-----------------------------------------------------------MT
-EAWNVAVFAARRSRD-DDDTTRGSVFTYTNTNN-TRGPFEGPNYHIAPRWV---------
-YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISV
-FNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-IKFDSK
-LAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELGCQSFM
-LTLMITCCFLPLFIIIVCYLQVWMAI----------------------------------
-------------------------------------------------------------
--------------------RAVAAQQK-----------------------------ESES
-TQKA----------EREVSRMVVVMIVAFCICWGPYASFVSFAAA---NPGYA-FHPLAA
-ALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF-----------GKKVDDGSEASTTSRT
-EVSSVS-NSSVAPA----------------------------------------------
------------------------------------------
+--------------------MTEAWNV------------AVFAARRSRDD-DDTT-----
+---RGSV-FTY-TNTNN-TRGPF----------------EGPNY-------HIAPRWV--
+-------YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLV
+A-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN
+-IKFDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVE
+LGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RAVAAQQK---------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------ESESTQKAEREVSRMVVVMIVAFCICWGPYASF
+VSFAAANPGY---AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF------
+--GKK-VDD-GS--EASTTSRTEVSSVS--NSSV--------------------------
+----------------------------------------------APA-----------
+---
> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
------------------------------------------------------------M
-AAWEAAFAARRRHE--EEDTTRDSVFTYTNSNN-TRGPFEGPNYHIAPRWV---------
-YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISV
-INQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-IKFDGK
-LAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPGVQSYM
-VVLMVTCCFFPLAIIILCYLQVWLAI----------------------------------
-------------------------------------------------------------
--------------------RAVAAQQK-----------------------------ESES
-TQKA----------EKEVSRMVVVMIVAYCFCWGPYTFFACFAAA---NPGYA-FHPLAA
-ALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF-----------GKKVDDGSEVST-SRT
-EVSSVS-NSSVSPA----------------------------------------------
------------------------------------------
+--------------------MA-AWEA------------AFAARRRHEE--EDTT-----
+---RDSV-FTY-TNSNN-TRGPF----------------EGPNY-------HIAPRWV--
+-------YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVI
+A-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN
+-IKFDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSD
+PGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RAVAAQQK---------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------ESESTQKAEKEVSRMVVVMIVAYCFCWGPYTFF
+ACFAAANPGY---AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF------
+--GKK-VDD-GS--EVST-SRTEVSSVS--NSSV--------------------------
+----------------------------------------------SPA-----------
+---
> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
-------------------------------------------------------------
-----------------------------MSSNSSQAPPNGTPGPFDGPQWP--YQAPQST
-YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSL
-SNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-FQFQRR
-HAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGS--NNNSYI
-LSLFVTCFVLPLSLILFSYTNLLLTL----------------------------------
-------------------------------------------------------------
--------------------RAAAAQQK-----------------------------EADT
-TQRA----------EREVTRMVIVMVMAFLLCWLPYSTFALVVAT---HKGII-IQPVLA
-SLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLCCGYQ------PQRTGKASPGTPGPHA
-DVTAAGLRNKVMPAHPV-------------------------------------------
------------------------------------------
+--------------------MSSNSSQA-----------------------PPNG-----
+------------------TPGPF----------------DGPQW------PYQAPQST--
+-------YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLC
+G-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD
+-FQFQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGS-
+-NNNSYILSLFVTCFVLPLSLILFSYTNLLLTL----RAAAAQQK---------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------EADTTQRAEREVTRMVIVMVMAFLLCWLPYSTF
+ALVVATHKGI---IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLCCGYQPQ
+RTGKA--------SPGTPGPHADVTAAGL-RNKV--------------------------
+----------------------------------------------MPAHPV--------
+---
> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
-------------------------------------------------------MESGNV
-SSSLFGNVSTALRPEARLSAE--------TRLLGWNVPPEELR-HIPEHWLTYPEPPESM
-NYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFI
-YNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHG
-KAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFV
-ACIFFFSFVCPTTMITYYYSQIVGHVFSH-------------------------------
-------------------------------------------------------------
-------------------EKALRDQAKKMNVESLRS-------------------NVDKN
-KETA----------EIRIAKAAITICFLFFCSWTPYGVMSLIGAF---GDKTL-LTPGAT
-MIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL---------NEKAPESSAVAST
-STTQEPQQTTAA------------------------------------------------
------------------------------------------
+-----MESGNVSS-----SLFGNVSTA-------------LRPEARL----SAET-----
+------RLLGW--------NVPP----------------EELR--------HIPEHWLTY
+PEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK
+--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG
+--KMTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DN
+FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVESL----------------
+------------------RS------NVDKNKETAEIRIAKAAITICFLFFCSWTPYGVM
+SLIGAFGDKT---LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLAL--
+----NEKAP-ES-SAVASTSTTQEPQQ---TTAA--------------------------
+------------------------------------------------------------
+---
> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
-------------------------------------------------------MEYHNV
-S-SVLGNVSSVLRPDARLSAE--------SRLLGWNVPPDELR-HIPEHWLIYPEPPESM
-NYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFI
-YNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHG
-KAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFDTRLFV
-ACIFFFSFVCPTTMITYYYSQIVGHVFSH-------------------------------
-------------------------------------------------------------
-------------------EKALRDQAKKMNVDSLRS-------------------NVDKS
-KEAA----------EIRIAKAAITICFLFFASWTPYGVMSLIGAF---GDKTL-LTPGAT
-MIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI---------SEKAPESRAAIST
-STTQEQQQTTAA------------------------------------------------
------------------------------------------
+-----MEYHNVS------SVLGNVSSV-------------LRPDARL----SAES-----
+------RLLGW--------NVPP----------------DELR--------HIPEHWLIY
+PEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK
+--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG
+--KMTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DN
+FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVDSL----------------
+------------------RS------NVDKSKEAAEIRIAKAAITICFLFFASWTPYGVM
+SLIGAFGDKT---LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAI--
+----SEKAP-ES-RAAISTSTTQEQQQ---TTAA--------------------------
+------------------------------------------------------------
+---
> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
-------------------------------------------------------ME----
---PLCNASEPPLRPEARSSGNGD------LQFLGWNVPPDQIQ-YIPEHWLTQLEPPASM
-HYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF-
--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFT
-KAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFV
-GTIFFFSFVCPTLMILYYYSQIVGHVFSH-------------------------------
-------------------------------------------------------------
-------------------EKALREQAKKMNVESLRS-------------------NVDKS
-KETA----------EIRIAKAAITICFLFFVSWTPYGVMSLIGAF---GDKSL-LTQGAT
-MIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV---------NEKSGEISSAQST
-TTQEQ-QQTTAA------------------------------------------------
------------------------------------------
+-----MEP------------LCNASEP------------PLRPEAR--SSGNGDL-----
+------QFLGW--------NVPP----------------DQIQ--------YIPEHWLTQ
+LEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK
+--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR
+--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DN
+FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVESL----------------
+------------------RS------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGVM
+SLIGAFGDKS---LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGV--
+----NEKSG-EI-SSAQSTTTQEQQQ----TTAA--------------------------
+------------------------------------------------------------
+---
> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
-------------------------------------------------------MD----
---ALCNASEPPLRPEARMSSGSDE-----LQFLGWNVPPDQIQ-YIPEHWLTQLEPPASM
-HYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFI
-YNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFT
-KAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFDTRLFV
-GTIFLFSFVVPTLMILYYYSQIVGHVFNH-------------------------------
-------------------------------------------------------------
-------------------EKALREQAKKMNVESLRS-------------------NVDKS
-KETA----------EIRIAKAAITICFLFFVSWTPYGVMSLIGAF---GDKSL-LTPGAT
-MIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV---------NEKSGEASSAQST
-TTQEQTQQTSAA------------------------------------------------
------------------------------------------
+-----MDA------------LCNASEP------------PLRPEARM-SSGSDEL-----
+------QFLGW--------NVPP----------------DQIQ--------YIPEHWLTQ
+LEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK
+--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR
+--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DN
+FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVESL----------------
+------------------RS------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGVM
+SLIGAFGDKS---LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGV--
+----NEKSG-EA-SSAQSTTTQEQTQQ---TSAA--------------------------
+------------------------------------------------------------
+---
> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1
-------------------------------------------------------------
----MTNATGPQMAYYGAASMDFGYPE---GVSIVDFVRPEIKP-YVHQHWYNYPPVNPMW
-HYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFT
-YNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-PKLTTG
-KAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFNTFSYN
-IFIFVFDYFLPAAIIVFSYVFIVKAIFAH-------------------------------
-------------------------------------------------------------
-------------------EAAMRAQAKKMNVSTLRS--------------------NEAD
-AQRA----------EIRIAKTALVNVSLWFICWTPYALISLKGVM---GDTSG-ITPLVS
-TLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV------HETETKSNDDSQSNST
-VAQDKA------------------------------------------------------
------------------------------------------
+--------------------MTNATGP------------QMAYYGAA----SMD------
+--------FGYPEGVSIVDFVRP----------------EIKP--------YVHQHWYNY
+PPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTT
+N-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG
+-PKLTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QD
+FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVSTL----------------
+------------------RS-------NEADAQRAEIRIAKTALVNVSLWFICWTPYALI
+SLKGVMGDTS---GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCV--
+--HETETKS-ND-DSQSNSTVAQDKA----------------------------------
+------------------------------------------------------------
+---
> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1
-------------------------------------------------------------
----MANVTGPQMAFYGSGAATFGYPE---GMTVADFVPDRVKH-MVLDHWYNYPPVNPMW
-HYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFC
-YNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-PKLTQG
-KATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMNTITYN
-ICIFIFDFFLPASVIVFSYVFIVKAIFAH-------------------------------
-------------------------------------------------------------
-------------------EAAMRAQAKKMNVTNLRS--------------------NEAE
-TQRA----------EIRIAKTALVNVSLWFICWTPYAAITIQGLL---GNAEG-ITPLLT
-TLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV------HEKDPNDVEENQSSNT
-QTQEKS------------------------------------------------------
------------------------------------------
+--------------------MANVTGP------------QMAFYGSG----AAT------
+--------FGYPEGMTVADFVPD----------------RVKH--------MVLDHWYNY
+PPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTT
+N-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG
+-PKLTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RD
+MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVTNL----------------
+------------------RS-------NEAETQRAEIRIAKTALVNVSLWFICWTPYAAI
+TIQGLLGNAE---GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCV--
+--HEKDPND-VE-ENQSSNTQTQEKS----------------------------------
+------------------------------------------------------------
+---
> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
----------------------------------------------------------MES
-FAVAAAQLGPHFA-----PLS--------NGSVVDKVTPDMAH-LISPYWNQFPAMDPIW
-AKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMG
-INLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIP
-LALGKM---------------------------YVPEGNLTSCGIDYLE--RDWNPRSYL
-IFYSIFVYYIPLFLICYSYWFIIAAVSAH-------------------------------
-------------------------------------------------------------
-------------------EKAMREQAKKMNVKSLRS--------------------SEDA
-EKSA----------EGKLAKVALVTITLWFMAWTPYLVINCMGLF---KF-EG-LTPLNT
-IWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKVDDGK-SSDAQSQA
-TASEAESKA---------------------------------------------------
------------------------------------------
+-----MESFA---------VAAAQLGP------------HFAPLS---------------
+-------------NGSVVDKVTP----------------DMAH--------LISPYWNQF
+PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT
+N-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG
+-RPMTIPLALGKM---------------------------YVPEGNLTSCGIDYLE--RD
+WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVKSL----------------
+------------------RS-------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVI
+NCMGLFKF-E---GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF--
+--GKVDDGK-SS-DAQSQATASEAESKA--------------------------------
+------------------------------------------------------------
+---
> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
----------------------------------------------------------MES
-FAVAAAQLGPHFA-----PLS--------NGSVVDKVTPDMAH-LISPYWNQFPAMDPIW
-AKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMG
-INLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIP
-LALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYL
-IFYSIFVYYIPLFLICYSYWFIIAAVSAH-------------------------------
-------------------------------------------------------------
-------------------EKAMREQAKKMNVKSLRS--------------------SEDA
-EKSA----------EGKLAKVALVTITLWFMAWTPYLVINCMGLF---KF-EG-LTPLNT
-IWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKVDDGK-SSDAQSQA
-TASEAESKA---------------------------------------------------
------------------------------------------
+-----MESFA---------VAAAQLGP------------HFAPLS---------------
+-------------NGSVVDKVTP----------------DMAH--------LISPYWNQF
+PAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT
+N-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG
+-RPMTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RD
+WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVKSL----------------
+------------------RS-------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVI
+NCMGLFKF-E---GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF--
+--GKVDDGK-SS-DAQSQATASEAESKA--------------------------------
+------------------------------------------------------------
+---
> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
----------------------------------------------------------MDS
-FAAVATQLGPQFA-----APS--------NGSVVDKVTPDMAH-LISPYWDQFPAMDPIW
-AKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMG
-INLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-RPMTIP
-LALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWNPRSYL
-IFYSIFVYYIPLFLICYSYWFIIAAVSAH-------------------------------
-------------------------------------------------------------
-------------------EKAMREQAKKMNVKSLRS--------------------SEDA
-DKSA----------EGKLAKVALVTISLWFMAWTPYLVINCMGLF---KF-EG-LTPLNT
-IWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF------GKVDDGK-SSEAQSQA
-TTSEAESKA---------------------------------------------------
------------------------------------------
+-----MDSFA---------AVATQLGP------------QFAAPS---------------
+-------------NGSVVDKVTP----------------DMAH--------LISPYWDQF
+PAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT
+N-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG
+-RPMTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RD
+WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVKSL----------------
+------------------RS-------SEDADKSAEGKLAKVALVTISLWFMAWTPYLVI
+NCMGLFKF-E---GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVF--
+--GKVDDGK-SS-EAQSQATTSEAESKA--------------------------------
+------------------------------------------------------------
+---
> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
-----------------------------------------------------MERSHLPE
-TPFDLAHSGPRFQ-----AQSSG------NGSVLDNVLPDMAH-LVNPYWSRFAPMDPMM
-SKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMI
-INFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIK
-TSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWNPRSYL
-ITYSLFVYYTPLFLICYSYWFIIAAVAAH-------------------------------
-------------------------------------------------------------
-------------------EKAMREQAKKMNVKSLRS--------------------SEDC
-DKSA----------EGKLAKVALTTISLWFMAWTPYLVICYFGLF---KI-DG-LTPLTT
-IWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF------GNTDEPKPDAPASDTE
-TTSEADSKA---------------------------------------------------
------------------------------------------
+MERSHLPETP---------FDLAHSGP------------RFQAQSSG-------------
+-------------NGSVLDNVLP----------------DMAH--------LVNPYWSRF
+APMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMAS
+Q-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING
+-TPMTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RM
+WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVKSL----------------
+------------------RS-------SEDCDKSAEGKLAKVALTTISLWFMAWTPYLVI
+CYFGLFKI-D---GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVF--
+--GNTDEPKPDA-PASDTETTSEADSKA--------------------------------
+------------------------------------------------------------
+---
> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
-----------------------------------------------------MERSLLPE
-PPLAMALLGPRFE-----AQTGG------NRSVLDNVLPDMAP-LVNPHWSRFAPMDPTM
-SKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMI
-INFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-TPMTIK
-TSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWNPRSYL
-ITYSLFVYYTPLFMICYSYWFIIATVAAH-------------------------------
-------------------------------------------------------------
-------------------EKAMRDQAKKMNVKSLRS--------------------SEDC
-DKSA----------ENKLAKVALTTISLWFMAWTPYLIICYFGLF---KI-DG-LTPLTT
-IWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC------GTTDEPKPDAPPSDTE
-TTSEAESKD---------------------------------------------------
------------------------------------------
+MERSLLPEPP---------LAMALLGP------------RFEAQTGG-------------
+-------------NRSVLDNVLP----------------DMAP--------LVNPHWSRF
+APMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMAS
+Q-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING
+-TPMTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQ
+WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVKSL----------------
+------------------RS-------SEDCDKSAENKLAKVALTTISLWFMAWTPYLII
+CYFGLFKI-D---GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVC--
+--GTTDEPKPDA-PPSDTETTSEAESKD--------------------------------
+------------------------------------------------------------
+---
> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
-------------------------------------------------------------
----MIAVSGPSYE-----AFSYGGQARFNNQTVVDKVPPDMLH-LIDANWYQYPPLNPMW
-HGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMV
-INCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-KPLSIN
-GALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLLSASYL
-VCYGIWVYFVPLFLIIYSYWFIIQAVAAH-------------------------------
-------------------------------------------------------------
-------------------EKNMREQAKKMNVASLRS--------------------SENQ
-NTSA----------ECKLAKVALMTISLWFMAWTPYLVINFSGIF---NL-VK-ISPLFT
-IWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC-------AAEPSSDAVSTTSGT
-TTVTDNEKSNA-------------------------------------------------
------------------------------------------
+--------------------MIAVSGP------------SYEAFSYG---GQARF-----
+------------NNQTVVDKVPP----------------DMLH--------LIDANWYQY
+PPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFC
+M-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG
+-KPLSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RG
+LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVASL----------------
+------------------RS-------SENQNTSAECKLAKVALMTISLWFMAWTPYLVI
+NFSGIFNL-V---KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLAC--
+---AAEPSS-DA-VSTTSGTTTVTDNEK--SNA---------------------------
+------------------------------------------------------------
+---
> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
-------------------------------------------------------------
------MANQLSYS-----SLGWPYQP---NASVVDTMPKEMLY-MIHEHWYAFPPMNPLW
-YSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMT
-SNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-APLTHK
-KATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWSSASYV
-VIYGLAVYFLPLITMIYCYFFIVHAVAEH-------------------------------
-------------------------------------------------------------
-------------------EKQLREQAKKMNVASLRA-------------------NADQQ
-KQSA----------ECRLAKVAMMTVGLWFMAWTPYLIISWAGVF---SSGTR-LTPLAT
-IWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC------GSGESGSDVKSEASAT
-TTMEEKPKIPEA------------------------------------------------
------------------------------------------
+--------------------MAN----------------QLSYSSLG-------------
+--------WPYQPNASVVDTMPK----------------EMLY--------MIHEHWYAF
+PPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAF
+M-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA
+-APLTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KD
+WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKK--------------
+------------------------------------------------------------
+--------------------------------------MNVASL----------------
+------------------RA------NADQQKQSAECRLAKVAMMTVGLWFMAWTPYLII
+SWAGVFSSGT---RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLAC--
+--GSGESGS-DV-KSEASATTTMEEKPK--------------------------------
+----------------------------------------------IPEA----------
+---
> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
-------------------------------------------------------------
-----------MVESTTLVNQTWWY-----NPTVD-----------IHPHWAKFDPIPDAV
-YYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKT
-ISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHR
-RAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPSTRSFI
-LCMYFCGFMLPIIIIAFCYFNIVMSVSNH-------------------------------
-------------------------------------------------------------
-------------------EKEMAAMAKRLNAKELR---------------------KAQA
-GASA----------EMKLAKISMVIITQFMLSWSPYAIIALLAQF---GPAEW-VTPYAA
-ELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKECEDANDAEEEVVAS
-ER--GGESRDAAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PPQGYPPQGAYPPPQGYP
-PQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA
+--------------------MVESTTL-----------------------VNQTW-----
+----------W-YNPTV----------------------------------DIHPHWAKF
+DPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAI
+NGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAA
+SKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TD
+PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKR--------------
+------------------------------------------------------------
+--------------------------------------LNAKEL----------------
+------------------R--------KAQAGASAEMKLAKISMVIITQFMLSWSPYAII
+ALLAQFGPAE---WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCC
+QFDEKECED-AN-DAEEEVVASERGGES--RDAAQMKEMMAMMQKMQAQQAAYQPPPPPQ
+GY--PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQA
+YQA
> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93]
-------------------------------------------------------------
------------MGRDLRDNETWWY-----NPSIV-----------VHPHWREFDQVPDAV
-YYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMT
-ISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHR
-RAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDSTTRSNI
-LCMFILGFFGPILIIFFCYFNIVMSVSNH-------------------------------
-------------------------------------------------------------
-------------------EKEMAAMAKRLNAKELR---------------------KAQA
-GANA----------EMRLAKISIVIVSQFLLSWSPYAVVALLAQF---GPLEW-VTPYAA
-QLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKETEDDKDAETEIPAG
-ESSDAAPSADAAQMKEMMAMMQKMQQQQAAY----PPQGYAPPPQGYPPQGY--PPQGYP
-PQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQAYQA
+--------------------MGRDLRD------------------------NETW-----
+----------W-YNPSI----------------------------------VVHPHWREF
+DQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLV
+NGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAA
+SKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RD
+STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR--------------
+------------------------------------------------------------
+--------------------------------------LNAKEL----------------
+------------------R--------KAQAGANAEMRLAKISIVIVSQFLLSWSPYAVV
+ALLAQFGPLE---WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCC
+QFDDKETED-DK-DAETEIPAGESSDAAPSADAAQMKEMMAMMQKMQQQQAAY----PPQ
+GYAPPPQGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQA
+YQA
> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra
----------MMDVNSSGRPDLYGHLRSFLLPEVGRGLPDLSPDGGADPVAGSWAPHLLS-
---EVTASPAPTWDAPPDNASGCGEQINYGRVE----------------------------
-KVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVS
-VTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGK
-CMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYT
-IYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRVEPDSVIAL-----------
-------------------------------------------------------------
-------------------NGIVKLQKEVEECANLSR-------------------LLKHE
-RKNI-----SIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSC-IPLWVE
-RTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYRNINRKLSAAGMHEALKLAERPE
-RPEFVLQNADYCRKKGHDS-----------------------------------------
------------------------------------------
+--------------------MMDVNSSGRPDLYGHLRSFLLPEVGRGLPDLSPDGGADPV
+AGSWAPHLLS---EVTASPAPTW----------------DAPPDNASGCGEQIN------
+--YGRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVA
+V-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTY
+PVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF------
+----GYTIYSTAVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF-----------
+----------------------PGF-----------------PRVEPDSVIAL-------
+--------NGIVKLQKEV--------------------EECANLSRLLKH----------
+-----------------ER--------KNISIFKREQKAATTLGIIVGAFTVCWLPFFLL
+STARPFICGTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYRNI
+--NRKLSAA-GMHEALKLAERPERPEFVL-QNADYCRK----------------------
+----------------------------KGHDS---------------------------
+---
> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
------------------------------------------------------MPHLLSG
-FLEVTASPAPTWDAPPDNVSGCGEQINYGRVE----------------------------
-KVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVS
-VTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGK
-CMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYT
-IYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRVQPESVISL-----------
-------------------------------------------------------------
-------------------NGVVKLQKEVEECANLSR-------------------LLKHE
-RKNI-----SIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSC-IPLWVE
-RTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYRNINRKLSAAGMHEALKLAERPE
-RSEFVLQNSDHCGKKGHDT-----------------------------------------
------------------------------------------
+--------------------M---------------------------------------
+-----PHLLSGFLEVTASPAPTW----------------DAPPDNVSGCGEQIN------
+--YGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVA
+V-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTY
+PVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF------
+----GYTIYSTAVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF-----------
+----------------------PGF-----------------PRVQPESVISL-------
+--------NGVVKLQKEV--------------------EECANLSRLLKH----------
+-----------------ER--------KNISIFKREQKAATTLGIIVGAFTVCWLPFFLL
+STARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQCQYRNI
+--NRKLSAA-GMHEALKLAERPERSEFVL-QNSDHCGK----------------------
+----------------------------KGHDT---------------------------
+---
> 31=p A47425 serotonin receptor 5HT-7 - rat
------------------------------------------------------MPHLLSG
-FLEVTASPAPTWDAPPDNVSGCGEQINYGRVE----------------------------
-KVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVS
-VTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGK
-CMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF----------GYT
-IYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPGFPRVQPESVISL-----------
-------------------------------------------------------------
-------------------NGVVKLQKEVEECANLSR-------------------LLKHE
-RKNI-----SIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSC-IPLWVE
-RTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYRNINRKLSAAGMHEALKLAERPE
-RSEFVLQNSDHCGKKGHDT-----------------------------------------
------------------------------------------
+--------------------M---------------------------------------
+-----PHLLSGFLEVTASPAPTW----------------DAPPDNVSGCGEQIN------
+--YGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVA
+V-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTY
+PVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF------
+----GYTIYSTAVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF-----------
+----------------------PGF-----------------PRVQPESVISL-------
+--------NGVVKLQKEV--------------------EECANLSRLLKH----------
+-----------------ER--------KNISIFKREQKAATTLGIIVGAFTVCWLPFFLL
+STARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQCQYRNI
+--NRKLSAA-GMHEALKLAERPERSEFVL-QNSDHCGK----------------------
+----------------------------KGHDT---------------------------
+---
> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
-MDVLSPGQGN---------------------------------------NTTSPPAPFET
-GGNTTGISDVTVSY----------------------------------------------
-QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAA
-LYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPR
-RAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACTISKDH----------GYT
-IYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK----------------------------
------------TVKKVEKTGADTRHGASPAPQPKKS----------VNGESGSRNWRLGV
-ESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCA--PASFERKN
-ERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF---CESSCHMPTLLG
-AIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFCRQ--------------------
-------------------------------------------------------------
------------------------------------------
+-----MDVLSPGQ-------GNNTTSPPA----------PFETGG---------------
+-------------NTTGISDVTV-------------------------------------
+-----SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVL
+V-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDY
+VNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPE-DRSDPDA--CTISKDH------
+----GYTIYSTFGAFYIPLLLMLVLYGRIF-------RAARFRIRK--------------
+-------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGSRNW
+RLGVESKAGGALCANGAVRQG-----------------DDGAALEVIEVHRVGNSKEHLP
+LPSEAGPTPCA--PASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIV
+ALVLPFCESS-C-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFCRQ
+------------------------------------------------------------
+------------------------------------------------------------
+---
> 33=p A35181 serotonin receptor class 1A - rat
-MDVFSFGQGN---------------------------------------NTTASQEPFGT
-GGNVTSISDVTFSY----------------------------------------------
-QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAA
-LYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPR
-RAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACTISKDH----------GYT
-IYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK----------------------------
------------TVRKVEKKGAGTSLGTSSAPPPKKS----------LNGQPGSGDWRRCA
-ENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYA--PACLERKN
-ERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF---CESSCHMPALLG
-AIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFCRR--------------------
-------------------------------------------------------------
------------------------------------------
+-----MDVFSFGQ-------GNNTTASQE----------PFGTGG---------------
+-------------NVTSISDVTF-------------------------------------
+-----SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVL
+V-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDY
+VNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPE-DRSDPDA--CTISKDH------
+----GYTIYSTFGAFYIPLLLMLVLYGRIF-------RAARFRIRK--------------
+-------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGDW
+RRCAENRAVGTPCTNGAVRQG-----------------DDEATLEVIEVHRVGNSKEHLP
+LPSESGSNSYA--PACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPFFIV
+ALVLPFCESS-C-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKFCRR
+------------------------------------------------------------
+------------------------------------------------------------
+---
> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
-MANFTFGDLALDVARMGGLASTPSGLR----------------------STGLTTPGLSP
-TGLVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATN----MTDDRYWSLTVYSHEH
-LVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSV
-VSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSAR
-RILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYT
-IFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVV
-SDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQ
-IQIETAEAFANGCA----EEASIAMLERQ-CNNGKK-------------------ISSND
-TPYSRTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF---VDPEG-IPPFAR
-SFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYRRGHR------------------
-------------------------------------------------------------
------------------------------------------
+-----MANFTFGDLALDVARMGGLASTPS----------GLRSTGLTTPGLSPTG-----
+-------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSL
+TVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVL
+V-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDY
+IRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK------
+----GYTIFSTVGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEE
+TTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPEN--
+-------ANGVNSNSSSS-ER-LKQIQIETAEAFANGCAEEASIAMLERQ-CNNGKKISS
+NDTPYS------------RT-------REKLELKRERKAARTLAIITGAFLICWLPFFII
+ALIGPFVDPE---GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR--
+------------------------------------------------------------
+-------------------------------------------------RGHR-------
+---
> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
-MANFTFGDLALDVARMGGLASTPSGLR----------------------STGLTTPGLSP
-TGLVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATN----MTDDRYWSLTVYSHEH
-LVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSV
-VSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSAR
-RILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK----------GYT
-IFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVV
-SDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQ
-IQIETAEAFANGCA----EEASIAMLERQ-CNNGKK-------------------ISSND
-TPYSRTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF---VDPEG-IPPFAR
-SFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYRRGHR------------------
-------------------------------------------------------------
------------------------------------------
+-----MANFTFGDLALDVARMGGLASTPS----------GLRSTGLTTPGLSPTG-----
+-------------LVTSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSL
+TVYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVL
+V-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDY
+IRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK------
+----GYTIFSTVGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEE
+TTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPEN--
+-------ANGVNSNSSSS-ER-LKQIQIETAEAFANGCAEEASIAMLERQ-CNNGKKISS
+NDTPYS------------RT-------REKLELKRERKAARTLAIITGAFLICWLPFFII
+ALIGPFVDPE---GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILFGKYR--
+------------------------------------------------------------
+-------------------------------------------------RGHR-------
+---
> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi
----------------------------MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEP
-NWNVTVVPNTTWWQ--------------------ASAPFDTPAALVR-------------
-AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGA
-VYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAK
-RVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQ
-IFATASSFYVPVLIILILYWRIYQTARKRIRR----------------------------
------------------RRGATARGGVGPPPVPAGGALVA------GGGSGGIAAAVVAV
-IGRPLPTISETTTTGFTNVSSNNTSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKP
-KEAA------DSKRERKAAKTLAIITGAFVACWLPFFVLAILVPT---CDCE--VSPVLT
-SLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRVRRRRAPQ---------------
-------------------------------------------------------------
------------------------------------------
+--------------------MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEPNW-----
+-------------NVTVVPNTTW---------WQASAPFDTPAAL---------------
+-----VRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACL
+V-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDY
+IHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV------
+----GYQIFATASSFYVPVLIILILYWRIY-------QTARKRIRR--------------
+-------------------------------RRGATARGGVGPPPVPAGG-ALVAGGG--
+-------SGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSP---EKQSCANGLEADP
+PTTGYGAVAAAYYPSLVRRKP------KEAADSKRERKAAKTLAIITGAFVACWLPFFVL
+AILVPTCDCE----VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRRVRR
+R-----------------------------------------------------------
+---------------------------------------------RAPQ-----------
+---
> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
---------------------MNGTE--GDNFYVPFSNKTG--LARSP-------------
--------------------------------YEYPQY-YLAEPW----------------
-------KYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLF
-G-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN
-F-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-WS-----RYIPEGMQCSCGPDYYTLNPN
-FNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA----------------
-------------------------------------------------------------
-------------------------------------------------------------
------STQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT--HQGSD-FGATFMTLPAF
-FAKSSALYNPVIYILMNKQFRNCMITTL-----CCGKNPLGDDE-SG-ASTSKTEVSSVS
-T-----------------------------------------------------------
------------------------------------SPV-------SP-A
+--------------------MN-------------------------GTE-------GDN
+FYVP-----------------------------------------FSNKTG---------
+--LARSPYEYPQY-YLAEPW----------------------KYSALAAYMFFLILVGFP
+VNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTM
+CSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-
+AAPPLVG-WS-----RYIPEGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFF
+CYGRLLCTVKEAAAAQQESA----------------------------------------
+------------------------------------------------------------
+-----------------------------------------STQKAEKEVTRMVVLMVIG
+FLVCWVPYASVAFYIFT-HQGS-D-FGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCM
+ITTL-----CCGKNPLGDDE-SG-ASTSKTEVSSVST-----------------------
+------------------------------------------------------------
+------------SPV-------SP-A
> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]
---------------------MNGTE--GPNFYVPFSNITG--VVRSP-------------
--------------------------------FEQPQY-YLAEPW----------------
-------QFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFG
-G-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN
-F-RFGENHAIMGVAFTWVMALAC-AAPPLVG-WS-----RYIPEGMQCSCGIDYYTLKPE
-VNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA----------------
-------------------------------------------------------------
-------------------------------------------------------------
------TTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFT--HQGSN-FGPIFMTLPAF
-FAKTASIYNPIIYIMMNKQFRNCMLTSL-----CCGKNPLGDDE-AS-ATASKTE-----
-T-----------------------------------------------------------
------------------------------------SQV-------AP-A
+--------------------MN-------------------------GTE-------GPN
+FYVP-----------------------------------------FSNITG---------
+--VVRSPFEQPQY-YLAEPW----------------------QFSMLAAYMFLLIVLGFP
+INFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTG
+CNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-
+AAPPLVG-WS-----RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFF
+CYGQLVFTVKEAAAQQQESA----------------------------------------
+------------------------------------------------------------
+-----------------------------------------TTQKAEKEVTRMVIIMVIF
+FLICWLPYASVAMYIFT-HQGS-N-FGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCM
+LTSL-----CCGKNPLGDDE-AS-ATASKTE-----T-----------------------
+------------------------------------------------------------
+------------SQV-------AP-A
> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9
---------------------MNGTE--GINFYVPMSNKTG--VVRSP-------------
--------------------------------FEYPQY-YLAEPW----------------
-------KYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACF
-G-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN
-F-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-WS-----RYMPEGMQCSCGPDYYTHNPD
-YHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA----------------
-------------------------------------------------------------
-------------------------------------------------------------
------TTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT--NKGAD-FTATLMAVPAF
-FSKSSSLYNPIIYVLMNKQFRNCMITTI-----CCGKNPFGDEDVSSTVSQSKTEVSSVS
-S-----------------------------------------------------------
------------------------------------SQV-------SP-A
+--------------------MN-------------------------GTE-------GIN
+FYVP-----------------------------------------MSNKTG---------
+--VVRSPFEYPQY-YLAEPW----------------------KYRLVCCYIFFLISTGLP
+INLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVG
+CAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-
+AAPPLFG-WS-----RYMPEGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFF
+SYGRLICKVREAAAQQQESA----------------------------------------
+------------------------------------------------------------
+-----------------------------------------TTQKAEKEVTRMVILMVLG
+FMLAWTPYAVVAFWIFT-NKGA-D-FTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCM
+ITTI-----CCGKNPFGDEDVSSTVSQSKTEVSSVSS-----------------------
+------------------------------------------------------------
+------------SQV-------SP-A
> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
---------------------MNGTE--GKNFYVPMSNRTG--LVRSP-------------
--------------------------------FEYPQY-YLAEPW----------------
-------QFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCF
-G-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS
-F-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-WS-----RYIPEGMQCSCGPDYYTLNPD
-YNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA----------------
-------------------------------------------------------------
-------------------------------------------------------------
------STQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFF--NKGAD-FSAKFMAIPAF
-FSKSSALYNPVIYVLLNKQFRNCMLTTI-----FCGKNPLGDDE-SSTVSTSKTEVSS--
-------------------------------------------------------------
--------------------------------------V-------SP-A
+--------------------MN-------------------------GTE-------GKN
+FYVP-----------------------------------------MSNRTG---------
+--LVRSPFEYPQY-YLAEPW----------------------QFKILALYLFFLMSMGLP
+INGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTG
+CAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-
+AAPPLFG-WS-----RYIPEGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFF
+TYGRLVCTVKAAAAQQQDSA----------------------------------------
+------------------------------------------------------------
+-----------------------------------------STQKAEREVTKMVILMVFG
+FLIAWTPYATVAAWIFF-NKGA-D-FSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCM
+LTTI-----FCGKNPLGDDE-SSTVSTSKTEVSS--------------------------
+------------------------------------------------------------
+--------------V-------SP-A
> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
---------------------MNGTE--GNNFYVPLSNRTG--LVRSP-------------
--------------------------------FEYPQY-YLAEPW----------------
-------QFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCF
-G-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS
-F-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-WS-----RYIPEGIQCSCGPDYYTLNPE
-YNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA----------------
-------------------------------------------------------------
-------------------------------------------------------------
------STQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFF--NKGAA-FSAQFMAIPAF
-FSKTSALYNPVIYVLLNKQFRSCMLTTL-----FCGKNPLGDEE-SSTVSTSKTEVSS--
-------------------------------------------------------------
--------------------------------------V-------SP-A
+--------------------MN-------------------------GTE-------GNN
+FYVP-----------------------------------------LSNRTG---------
+--LVRSPFEYPQY-YLAEPW----------------------QFKLLAVYMFFLICLGLP
+INGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTG
+CAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-
+AAPPLVG-WS-----RYIPEGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFF
+TYGRLVCTVKAAAAQQQDSA----------------------------------------
+------------------------------------------------------------
+-----------------------------------------STQKAEREVTKMVILMVLG
+FLVAWTPYATVAAWIFF-NKGA-A-FSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCM
+LTTL-----FCGKNPLGDEE-SSTVSTSKTEVSS--------------------------
+------------------------------------------------------------
+--------------V-------SP-A
> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208-
---------------------MKQVPEFHEDFYIPIPLDINNLSAYSP-------------
--------------------------------FLVPQD-HLGNQG----------------
-------IFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIF
-G-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN
-F-TFKTPHAIAGCILPWISALAA-SLPPLFG-WS-----RYIPEGLQCSCGPDWYTTNNK
-YNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA----------------
-------------------------------------------------------------
-------------------------------------------------------------
------STQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVS--HRGEE-FDLRMATIPSC
-LSKASTVYNPVIYVLMNKQFRSCMM-KM-----VCGKN-IEEDE-AS-TSSQVTQVSS--
-------------------------------------------------------------
--------------------------------------V-------APEK
+--------------------MK-------------------------QVPE-----FHED
+FYIP-----------------------------------------IPLDINN--------
+-LSAYSPFLVPQD-HLGNQG----------------------IFMAMSVFMFFIFIGGAS
+INILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFFN-RYFI--FGATA
+CKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGNF-TFKTPHAIAGCILPWISALAA-
+SLPPLFG-WS-----RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVF
+CYGQLLITLKLAAKAQADSA----------------------------------------
+------------------------------------------------------------
+-----------------------------------------STQKAEREVTKMVVVMVLG
+FLVCWAPYASFSLWIVS-HRGE-E-FDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCM
+M-KM-----VCGKN-IEEDE-AS-TSSQVTQVSS--------------------------
+------------------------------------------------------------
+--------------V-------APEK
> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
---------------------MRKMS--EEEFYL-FKNISS----VGP-------------
--------------------------------WDGPQY-HIAPVW----------------
-------AFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIF
-S-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN
-F-RFSSKHALTVVLATWTIGIGV-SIPPFFG-WS-----RFIPEGLQCSCGPDWYTVGTK
-YRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA----------------
-------------------------------------------------------------
-------------------------------------------------------------
------TTQKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN--NRNHG-LDLRLVTIPSF
-FSKSACIYNPIIYCFMNKQFQACIM-KM-----VCGKA-MTDES-DT-CSSQKTEVSTVS
-S-----------------------------------------------------------
------------------------------------TQV-------GP-N
+--------------------MR-------------------------KMS-------EEE
+FYL------------------------------------------FKNISS---------
+--V--GPWDGPQY-HIAPVW----------------------AFYLQAAFMGTVFLIGFP
+LNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHV
+CALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGNF-RFSSKHALTVVLATWTIGIGV-
+SIPPFFG-WS-----RFIPEGLQCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICF
+SYTQLLRALKAVAAQQQESA----------------------------------------
+------------------------------------------------------------
+-----------------------------------------TTQKAEREVSRMVVVMVGS
+FCVCYVPYAAFAMYMVN-NRNH-G-LDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACI
+M-KM-----VCGKA-MTDES-DT-CSSQKTEVSTVSS-----------------------
+------------------------------------------------------------
+------------TQV-------GP-N
> 8=opsin, greensensitive human (fragment) S07060
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------DLAETVI
-A-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN
-V-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSSY
-PGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE----------------
------------------------------------------------------------
+-----------------------------DLAETVIA-STISIVNQVS-GYFV--LGHPM
+CVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-
+TAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVL
+CYLQVWLAIRAVAKQQKESE----------------------------------------
------------------------------------------------------------
------STQKAEKEVTRMVVVMVLAFC----------------------------------
+-----------------------------------------STQKAEKEVTRMVVVMVLA
+FC----------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
--------------------------------------------------
+--------------------------
> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
-MAQQWS-LQRLAGRHPQDSYEDSTQ--SSIFTYTNSNST-----RGP-------------
--------------------------------FEGPNY-HIAPRW----------------
-------VYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVI
-A-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN
-V-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSSY
-PGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE----------------
-------------------------------------------------------------
-------------------------------------------------------------
------STQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA--NPGYP-FHPLMAALPAF
-FAKSATIYNPVIYVFMNRQFRNCILQLF-------GKKVDDGSE-LS--SASKTEVSSV-
-------------------------------------------------------------
------------------------------------SSV-------SP-A
+MAQQWS-LQRLAGRHPQDSYED-------------------------STQ-------SSI
+FTYT-----------------------------------------NSNSTR---------
+-----GPFEGPNY-HIAPRW----------------------VYHLTSVWMIFVVIASVF
+TNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQVY-GYFV--LGHPM
+CVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-
+TAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVL
+CYLQVWLAIRAVAKQQKESE----------------------------------------
+------------------------------------------------------------
+-----------------------------------------STQKAEKEVTRMVVVMVLA
+FCFCWGPYAFFACFAAA-NPGY-P-FHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCI
+LQLF-------GKKVDDGSE-LS--SASKTEVSSV-------------------------
+------------------------------------------------------------
+------------SSV-------SP-A
> 10== Z68193 1 human Red Opsin <>[]
-MAQQWS-LQRLAGRHPQDSYEDSTQ--SSIFTYTNSNST-----RGP-------------
--------------------------------FEGPNY-HIAPRW----------------
-------VYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVI
-A-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN
-V-RFDAKLAIVGIAFSWIWSAVW-TAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSSY
-PGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE----------------
-------------------------------------------------------------
-------------------------------------------------------------
------STQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA--NPGYA-FHPLMAALPAY
-FAKSATIYNPVIYVFMNRQFRNCILQLF-------GKKVDDGSE-LS--SASKTEVSSV-
-------------------------------------------------------------
------------------------------------SSV-------SP-A
+MAQQWS-LQRLAGRHPQDSYED-------------------------STQ-------SSI
+FTYT-----------------------------------------NSNSTR---------
+-----GPFEGPNY-HIAPRW----------------------VYHLTSVWMIFVVTASVF
+TNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQVS-GYFV--LGHPM
+CVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-
+TAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIML
+CYLQVWLAIRAVAKQQKESE----------------------------------------
+------------------------------------------------------------
+-----------------------------------------STQKAEKEVTRMVVVMIFA
+YCVCWGPYTFFACFAAA-NPGY-A-FHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCI
+LQLF-------GKKVDDGSE-LS--SASKTEVSSV-------------------------
+------------------------------------------------------------
+------------SSV-------SP-A
> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92]
-MTEAWNVAVFAARRSRDD--DDTTR--GSVFTYTNTNNT-----RGP-------------
--------------------------------FEGPNY-HIAPRW----------------
-------VYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLV
-A-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN
-I-KFDSKLAIIGIVFSWVWAWGW-SAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSVE
-LGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE----------------
-------------------------------------------------------------
-------------------------------------------------------------
------STQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAA--NPGYA-FHPLAAALPAY
-FAKSATIYNPVIYVFMNRQFRNCIMQLF-------GKKVDDGSE-AS--TTSRTEVSSVS
-N-----------------------------------------------------------
------------------------------------SSV-------AP-A
+MTEAWNVAVFAARRSRDD--DD-------------------------TTR-------GSV
+FTYT-----------------------------------------NTNNTR---------
+-----GPFEGPNY-HIAPRW----------------------VYNLVSFFMIIVVIASCF
+TNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQIF-GYFI--LGHPL
+CVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-
+SAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIV
+CYLQVWMAIRAVAAQQKESE----------------------------------------
+------------------------------------------------------------
+-----------------------------------------STQKAEREVSRMVVVMIVA
+FCICWGPYASFVSFAAA-NPGY-A-FHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCI
+MQLF-------GKKVDDGSE-AS--TTSRTEVSSVSN-----------------------
+------------------------------------------------------------
+------------SSV-------AP-A
> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
-MA-AWE-AAFAARRRHEE--EDTTR--DSVFTYTNSNNT-----RGP-------------
--------------------------------FEGPNY-HIAPRW----------------
-------VYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVI
-A-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN
-I-KFDGKLAVAGILFSWLWSCAW-TAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSSD
-PGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE----------------
-------------------------------------------------------------
-------------------------------------------------------------
------STQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA--NPGYA-FHPLAAALPAY
-FAKSATIYNPIIYVFMNRQFRNCILQLF-------GKKVDDGSE-VS--T-SRTEVSSVS
-N-----------------------------------------------------------
------------------------------------SSV-------SP-A
+MA-AWE-AAFAARRRHEE--ED-------------------------TTR-------DSV
+FTYT-----------------------------------------NSNNTR---------
+-----GPFEGPNY-HIAPRW----------------------VYNLTSVWMIFVVAASVF
+TNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQIS-GYFI--LGHPM
+CVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-
+TAPPIFG-WS-----RYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIIL
+CYLQVWLAIRAVAAQQKESE----------------------------------------
+------------------------------------------------------------
+-----------------------------------------STQKAEKEVSRMVVVMIVA
+YCFCWGPYTFFACFAAA-NPGY-A-FHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCI
+LQLF-------GKKVDDGSE-VS--T-SRTEVSSVSN-----------------------
+------------------------------------------------------------
+------------SSV-------SP-A
> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
-------------------------M--SSNSSQAPPNGT-----PGP-------------
--------------------------------FDGPQWPYQAPQS----------------
-------TYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLC
-G-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD
-F-QFQRRHAVSGCAFTWGWALLW-SAPPLLG-WS-----SYVPEGLRTSCGPNWYTGGSN
---NNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD----------------
-------------------------------------------------------------
-------------------------------------------------------------
------TTQRAEREVTRMVIVMVMAFLLCWLPYSTFALVVAT--HKGII-IQPVLASLPSY
-FSKTATVYNPIIYVFMNKQFQSCLLEML-----CCGYQPQRTGK-AS--PGTPGPHADVT
-A-----------------------------------------------------------
------------------------------------AGLRNKVMPAHP-V
+-------------------------------------------------M-------SSN
+SSQA-----------------------------------------PPNGTP---------
+-----GPFDGPQWPYQAPQS----------------------TYVGVAVLMGTVVACASV
+VNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRM
+CELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-
+SAPPLLG-WS-----SYVPEGLRTSCGPNWYTGGSN--NNSYILSLFVTCFVLPLSLILF
+SYTNLLLTLRAAAAQQKEAD----------------------------------------
+------------------------------------------------------------
+-----------------------------------------TTQRAEREVTRMVIVMVMA
+FLLCWLPYSTFALVVAT-HKGI-I-IQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCL
+LEML-----CCGYQPQRTGK-AS--PGTPGPHADVTA-----------------------
+------------------------------------------------------------
+------------AGLRNKVMPAHP-V
> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
-MESGNVSSSLFGNVSTALRPE----ARLSA---E---TRLLGWNVPP-------------
--------------------------------EELR---HIPEHWLTYPE-----------
---PPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK
---TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG
---KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVPEGYLTSCTFDYLTDN--
-FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVESLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-SNVDKNKETAEIRIAKAAITICFLFFCSWTPYGVMSLIGAF--GDKTL-LTPGATMIPAC
-ACKMVACIDPFVYAISHPRYRMELQKRCPWL--ALNEKAPESSA----VASTST---TQE
-P-----------------------------------------------------------
------------------------------------QQT-------TA-A
+--------------------MESGNVS------------SSLFGNVSTAL-------RPE
+-----------------------ARLSA---E---------------TRLL---------
+--GWNVPPEELR--HIPEHWLTYPE-------------PPESMNYLLGTLYIFFTLMSML
+GNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMM-VK-TPIFIYNSFH-QGYA--LGHLG
+CQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-
+VVACYTETWG-----RFVPEGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITY
+YYSQIVGHVFSHEKALRDQAKKM------------------NVESLRS------------
+------------------------------------------------------------
+-------------------------------------NVDKNKETAEIRIAKAAITICFL
+FFCSWTPYGVMSLIGAF-GDKT-L-LTPGATMIPACACKMVACIDPFVYAISHPRYRMEL
+QKRCPWL--ALNEKAPESSA----VASTST---TQEP-----------------------
+------------------------------------------------------------
+------------QQT-------TA-A
> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
-MEYHNVSSVL-GNVSSVLRPD----ARLSA---E---SRLLGWNVPP-------------
--------------------------------DELR---HIPEHWLIYPE-----------
---PPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK
---TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG
---KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVPEGYLTSCTFDYLTDN--
-FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVDSLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-SNVDKSKEAAEIRIAKAAITICFLFFASWTPYGVMSLIGAF--GDKTL-LTPGATMIPAC
-TCKMVACIDPFVYAISHPRYRMELQKRCPWL--AISEKAPESRA----AISTST---TQE
-Q-----------------------------------------------------------
------------------------------------QQT-------TA-A
+--------------------MEYHNVS------------SVL-GNVSSVL-------RPD
+-----------------------ARLSA---E---------------SRLL---------
+--GWNVPPDELR--HIPEHWLIYPE-------------PPESMNYLLGTLYIFFTVISMI
+GNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMM-IK-TPIFIYNSFH-QGYA--LGHLG
+CQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-
+VVACYTESWG-----RFVPEGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITY
+YYSQIVGHVFSHEKALRDQAKKM------------------NVDSLRS------------
+------------------------------------------------------------
+-------------------------------------NVDKSKEAAEIRIAKAAITICFL
+FFASWTPYGVMSLIGAF-GDKT-L-LTPGATMIPACTCKMVACIDPFVYAISHPRYRMEL
+QKRCPWL--AISEKAPESRA----AISTST---TQEQ-----------------------
+------------------------------------------------------------
+------------QQT-------TA-A
> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
-ME------PLCNASEPPLRPE----AR-SSGNGD---LQFLGWNVPP-------------
--------------------------------DQIQ---YIPEHWLTQLE-----------
---PPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK
---APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR
---NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTSCSFDYLSDN--
-FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKMNVESLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-SNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF--GDKSL-LTQGATMIPAC
-TCKLVACIDPFVYAISHPRYRLELQKRCPWL--GVNEKSGEISS----AQSTTT---QEQ
-------------------------------------------------------------
------------------------------------QQT-------TA-A
+--------------------ME------------------PLCNASEPPL-------RPE
+-----------------------AR-SSGNGD---------------LQFL---------
+--GWNVPPDQIQ--YIPEHWLTQLE-------------PPASMHYMLGVFYIFLFCASTV
+GNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMC-LK-APIF--NSFH-RGFAIYLGNTW
+CQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-
+VVLPLTQFWD-----RFVPEGYLTSCSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMILY
+YYSQIVGHVFSHEKALREQAKKM------------------NVESLRS------------
+------------------------------------------------------------
+-------------------------------------NVDKSKETAEIRIAKAAITICFL
+FFVSWTPYGVMSLIGAF-GDKS-L-LTQGATMIPACTCKLVACIDPFVYAISHPRYRLEL
+QKRCPWL--GVNEKSGEISS----AQSTTT---QEQ------------------------
+------------------------------------------------------------
+------------QQT-------TA-A
> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
-MD------ALCNASEPPLRPE----ARMSSGSDE---LQFLGWNVPP-------------
--------------------------------DQIQ---YIPEHWLTQLE-----------
---PPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK
---APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR
---NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTSCSFDYLSDN--
-FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKMNVESLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-SNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF--GDKSL-LTPGATMIPAC
-TCKLVACIEPFVYAISHPRYRMELQKRCPWL--GVNEKSGEASS----AQSTTT---QEQ
-T-----------------------------------------------------------
------------------------------------QQT-------SA-A
+--------------------MD------------------ALCNASEPPL-------RPE
+-----------------------ARMSSGSDE---------------LQFL---------
+--GWNVPPDQIQ--YIPEHWLTQLE-------------PPASMHYMLGVFYIFLFFASTL
+GNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMC-LK-APIFIYNSFH-RGFA--LGNTW
+CQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-
+VVLPLTQFWD-----RFVPEGYLTSCSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMILY
+YYSQIVGHVFNHEKALREQAKKM------------------NVESLRS------------
+------------------------------------------------------------
+-------------------------------------NVDKSKETAEIRIAKAAITICFL
+FFVSWTPYGVMSLIGAF-GDKS-L-LTPGATMIPACTCKLVACIEPFVYAISHPRYRMEL
+QKRCPWL--GVNEKSGEASS----AQSTTT---QEQT-----------------------
+------------------------------------------------------------
+------------QQT-------SA-A
> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1
-MT-----------NATGPQMAYYGAASMDFGYPE---GVSIVDFVRP-------------
--------------------------------EIKP---YVHQHWYNYPP-----------
---VNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTT
-N-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG
-P-KLTTGKAVVFALISWVIAIGC-ALPPFFG-WG-----NYILEGILDSCSYDYLTQD--
-FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKMNVSTLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-NEADAQRAEIRIAKTALVNVSLWFICWTPYALISLKGVM--GDTSG-ITPLVSTLPAL
-LAKSCSCYNPFVYAISHPKYRLAITQHLPWF--CVHETETKSND----DSQSNS---TVA
-Q-----------------------------------------------------------
----------------------------------------------DK-A
+--------------------MT-------------------------NAT-------GPQ
+MAYYG-----------------AASMDFGYPE---------------GVSI---------
+--VDFVRPEIKP--YVHQHWYNYPP-------------VNPMWHYLLGVIYLFLGTVSIF
+GNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQY
+CEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGP-KLTTGKAVVFALISWVIAIGC-
+ALPPFFG-WG-----NYILEGILDSCSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVF
+SYVFIVKAIFAHEAAMRAQAKKM------------------NVSTLRS------------
+------------------------------------------------------------
+--------------------------------------NEADAQRAEIRIAKTALVNVSL
+WFICWTPYALISLKGVM-GDTS-G-ITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAI
+TQHLPWF--CVHETETKSND----DSQSNS---TVAQ-----------------------
+------------------------------------------------------------
+----------------------DK-A
> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1
-MA-----------NVTGPQMAFYGSGAATFGYPE---GMTVADFVPD-------------
--------------------------------RVKH---MVLDHWYNYPP-----------
---VNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTT
-N-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG
-P-KLTQGKATFMCGLAWVISVGW-SLPPFFG-WG-----SYTLEGILDSCSYDYFTRD--
-MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKMNVTNLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-NEAETQRAEIRIAKTALVNVSLWFICWTPYAAITIQGLL--GNAEG-ITPLLTTLPAL
-LAKSCSCYNPFVYAISHPKFRLAITQHLPWF--CVHEKDPNDVE----ENQSSN---TQT
-Q-----------------------------------------------------------
----------------------------------------------EK-S
+--------------------MA-------------------------NVT-------GPQ
+MAFYG-----------------SGAATFGYPE---------------GMTV---------
+--ADFVPDRVKH--MVLDHWYNYPP-------------VNPMWHYLLGVVYLFLGVISIA
+GNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTY
+CEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGP-KLTQGKATFMCGLAWVISVGW-
+SLPPFFG-WG-----SYTLEGILDSCSYDYFTRD--MNTITYNICIFIFDFFLPASVIVF
+SYVFIVKAIFAHEAAMRAQAKKM------------------NVTNLRS------------
+------------------------------------------------------------
+--------------------------------------NEAETQRAEIRIAKTALVNVSL
+WFICWTPYAAITIQGLL-GNAE-G-ITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAI
+TQHLPWF--CVHEKDPNDVE----ENQSSN---TQTQ-----------------------
+------------------------------------------------------------
+----------------------EK-S
> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
-ME-----SFAVAAAQLGPHFA-----PLS--------NGSVVDKVTP-------------
--------------------------------DMAH---LISPYWNQFPA-----------
---MDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT
-N-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG
-R-PMTIPLALGKM---------------------------YVPEGNLTSCGIDYLERD--
-WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF--KF-EG-LTPLNTIWGAC
-FAKSAACYNPIVYGISHPKYRLALKEKCPCC--VFGKVDDGKSS----DAQSQA-TASEA
-E-----------------------------------------------------------
----------------------------------------------SK-A
+--------------------ME-------------------SFAVAAAQL-------GPH
+FA----------------------PLS--------------------NGSV---------
+--VDKVTPDMAH--LISPYWNQFPA-------------MDPIWAKILTAYMIMIGMISWC
+GNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMM
+CDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKM-----------
+----------------YVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICY
+SYWFIIAAVSAHEKAMREQAKKM------------------NVKSLRS------------
+------------------------------------------------------------
+--------------------------------------SEDAEKSAEGKLAKVALVTITL
+WFMAWTPYLVINCMGLF-KF-E-G-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLAL
+KEKCPCC--VFGKVDDGKSS----DAQSQA-TASEAE-----------------------
+------------------------------------------------------------
+----------------------SK-A
> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
-ME-----SFAVAAAQLGPHFA-----PLS--------NGSVVDKVTP-------------
--------------------------------DMAH---LISPYWNQFPA-----------
---MDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT
-N-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG
-R-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-WS-----RYVPEGNLTSCGIDYLERD--
-WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF--KF-EG-LTPLNTIWGAC
-FAKSAACYNPIVYGISHPKYRLALKEKCPCC--VFGKVDDGKSS----DAQSQA-TASEA
-E-----------------------------------------------------------
----------------------------------------------SK-A
+--------------------ME-------------------SFAVAAAQL-------GPH
+FA----------------------PLS--------------------NGSV---------
+--VDKVTPDMAH--LISPYWNQFPA-------------MDPIWAKILTAYMIMIGMISWC
+GNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMM
+CDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-
+CLAPAFG-WS-----RYVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICY
+SYWFIIAAVSAHEKAMREQAKKM------------------NVKSLRS------------
+------------------------------------------------------------
+--------------------------------------SEDAEKSAEGKLAKVALVTITL
+WFMAWTPYLVINCMGLF-KF-E-G-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLAL
+KEKCPCC--VFGKVDDGKSS----DAQSQA-TASEAE-----------------------
+------------------------------------------------------------
+----------------------SK-A
> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
-MD-----SFAAVATQLGPQFA-----APS--------NGSVVDKVTP-------------
--------------------------------DMAH---LISPYWDQFPA-----------
---MDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT
-N-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG
-R-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-WS-----RYVPEGNLTSCGIDYLERD--
-WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-SEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMGLF--KF-EG-LTPLNTIWGAC
-FAKSAACYNPIVYGISHPKYRLALKEKCPCC--VFGKVDDGKSS----EAQSQA-TTSEA
-E-----------------------------------------------------------
----------------------------------------------SK-A
+--------------------MD-------------------SFAAVATQL-------GPQ
+FA----------------------APS--------------------NGSV---------
+--VDKVTPDMAH--LISPYWDQFPA-------------MDPIWAKILTAYMIIIGMISWC
+GNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMM
+CDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWC
+CLAPVFG-WS-----RYVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICY
+SYWFIIAAVSAHEKAMREQAKKM------------------NVKSLRS------------
+------------------------------------------------------------
+--------------------------------------SEDADKSAEGKLAKVALVTISL
+WFMAWTPYLVINCMGLF-KF-E-G-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLAL
+KEKCPCC--VFGKVDDGKSS----EAQSQA-TTSEAE-----------------------
+------------------------------------------------------------
+----------------------SK-A
> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
-MERSHLPETPFDLAHSGPRFQ-----AQSSG------NGSVLDNVLP-------------
--------------------------------DMAH---LVNPYWSRFAP-----------
---MDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMAS
-Q-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING
-T-PMTIKTSIMKILFIWMMAVFW-TVMPLIG-WS-----AYVPEGNLTACSIDYMTRM--
-WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKMNVKSLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-SEDCDKSAEGKLAKVALTTISLWFMAWTPYLVICYFGLF--KI-DG-LTPLTTIWGAT
-FAKTSAVYNPIVYGISHPKYRIVLKEKCPMC--VFGNTDEPKPD----APASDTETTSEA
-D-----------------------------------------------------------
----------------------------------------------SK-A
+--------------------MERSHL--------------PETPFDLAHS-------GPR
+FQ----------------------AQSSG------------------NGSV---------
+--LDNVLPDMAH--LVNPYWSRFAP-------------MDPMMSKILGLFTLAIMIISCC
+GNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLW
+CDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-
+TVMPLIG-WS-----AYVPEGNLTACSIDYMTRM--WNPRSYLITYSLFVYYTPLFLICY
+SYWFIIAAVAAHEKAMREQAKKM------------------NVKSLRS------------
+------------------------------------------------------------
+--------------------------------------SEDCDKSAEGKLAKVALTTISL
+WFMAWTPYLVICYFGLF-KI-D-G-LTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVL
+KEKCPMC--VFGNTDEPKPD----APASDTETTSEAD-----------------------
+------------------------------------------------------------
+----------------------SK-A
> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
-MERSLLPEPPLAMALLGPRFE-----AQTGG------NRSVLDNVLP-------------
--------------------------------DMAP---LVNPHWSRFAP-----------
---MDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMAS
-Q-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING
-T-PMTIKTSIMKIAFIWMMAVFW-TIMPLIG-WS-----SYVPEGNLTACSIDYMTRQ--
-WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKMNVKSLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-SEDCDKSAENKLAKVALTTISLWFMAWTPYLIICYFGLF--KI-DG-LTPLTTIWGAT
-FAKTSAVYNPIVYGISHPNDRLVLKEKCPMC--VCGTTDEPKPD----APPSDTETTSEA
-E-----------------------------------------------------------
----------------------------------------------SK-D
+--------------------MERSLL--------------PEPPLAMALL-------GPR
+FE----------------------AQTGG------------------NRSV---------
+--LDNVLPDMAP--LVNPHWSRFAP-------------MDPTMSKILGLFTLVILIISCC
+GNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLW
+CDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-
+TIMPLIG-WS-----SYVPEGNLTACSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMICY
+SYWFIIATVAAHEKAMRDQAKKM------------------NVKSLRS------------
+------------------------------------------------------------
+--------------------------------------SEDCDKSAENKLAKVALTTISL
+WFMAWTPYLIICYFGLF-KI-D-G-LTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVL
+KEKCPMC--VCGTTDEPKPD----APPSDTETTSEAE-----------------------
+------------------------------------------------------------
+----------------------SK-D
> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
-MI-----------AVSGPSYE-----AFSYGGQARFNNQTVVDKVPP-------------
--------------------------------DMLH---LIDANWYQYPP-----------
---LNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFC
-M-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG
-K-PLSINGALIRIIAIWLFSLGW-TIAPMFG-WN-----RYVPEGNMTACGTDYFNRG--
-LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKMNVASLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-SENQNTSAECKLAKVALMTISLWFMAWTPYLVINFSGIF--NL-VK-ISPLFTIWGSL
-FAKANAVYNPIVYGISHPKYRAALFAKFPSL--AC-AAEPSSDA----VSTTSG-TTTVT
-D-----------------------------------------------------------
-----------------------------------NEK--------SN-A
+--------------------MI-------------------------AVS-------GPS
+YE----------------------AFSYGGQA------------RFNNQTV---------
+--VDKVPPDMLH--LIDANWYQYPP-------------LNPMWHGILGFVIGMLGFVSAM
+GNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCYY-ETWV--LGPLF
+CQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-
+TIAPMFG-WN-----RYVPEGNMTACGTDYFNRG--LLSASYLVCYGIWVYFVPLFLIIY
+SYWFIIQAVAAHEKNMREQAKKM------------------NVASLRS------------
+------------------------------------------------------------
+--------------------------------------SENQNTSAECKLAKVALMTISL
+WFMAWTPYLVINFSGIF-NL-V-K-ISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAAL
+FAKFPSL--AC-AAEPSSDA----VSTTSG-TTTVTDN----------------------
+------------------------------------------------------------
+------------EK--------SN-A
> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
-M-------------ANQLSYS-----SLGWPYQP---NASVVDTMPK-------------
--------------------------------EMLY---MIHEHWYAFPP-----------
---MNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAF
-M-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA
-A-PLTHKKATLLLLFVWIWSGGW-TILPFFG-WS-----RYVPEGNLTSCTVDYLTKD--
-WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKMNVASLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-ANADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVF--SSGTR-LTPLATIWGSV
-FAKANSCYNPIVYGISHPRYKAALYQRFPSL--ACGSGESGSDV----KSEASA-TTTME
-E-----------------------------------------------------------
-----------------------------------KPKI-------PE-A
+--------------------M---------------------------AN-------QLS
+YS----------------------SLGWPYQP---------------NASV---------
+--VDTMPKEMLY--MIHEHWYAFPP-------------MNPLWYSILGVAMIILGIICVL
+GNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFM
+CEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-
+TILPFFG-WS-----RYVPEGNLTSCTVDYLTKD--WSSASYVVIYGLAVYFLPLITMIY
+CYFFIVHAVAEHEKQLREQAKKM------------------NVASLRA------------
+------------------------------------------------------------
+-------------------------------------NADQQKQSAECRLAKVAMMTVGL
+WFMAWTPYLIISWAGVF-SSGT-R-LTPLATIWGSVFAKANSCYNPIVYGISHPRYKAAL
+YQRFPSL--ACGSGESGSDV----KSEASA-TTTMEEK----------------------
+------------------------------------------------------------
+------------PKI-------PE-A
> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
-MV-------------ESTTLV-----NQTWWY-----NPTV-------------------
---------------------------------------DIHPHWAKFDP-----------
---IPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAI
-NGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAA
-SKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-WG-----AYVPEGILTSCSFDYLSTD--
-PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRLNAKELR-------
-------------------------------------------------------------
-------------------------------------------------------------
-K-AQ-AGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQF--GPAEW-VTPYAAELPVL
-FAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKECED----ANDAEEEVVASE
-R--GGESRDAAQMKEMMAMMQKMQAQQAAYQP---PPPPQGYPPQGYPPQGAYPPPQGYP
-PQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQA-------YQ-A
+--------------------MV-------------------------EST-------TL-
+-----------------------VNQTWWY-----------------NPTV---------
+--------------DIHPHWAKFDP-------------IPDAVYYSVGIFIGVVGIIGIL
+GNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAFM-KKWI--FGKVA
+CQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-
+SVGPVFN-WG-----AYVPEGILTSCSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIAF
+CYFNIVMSVSNHEKEMAAMAKRL------------------NAKELRK------------
+------------------------------------------------------------
+--------------------------------------AQ-AGASAEMKLAKISMVIITQ
+FMLSWSPYAIIALLAQF-GPAE-W-VTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAI
+QTTFPWLLTCCQFDEKECED----ANDAEE-EVVASER--GGESRDAAQMKEMMAMMQKM
+QAQQAAYQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQV
+EAPQGAPPQGVDNQA-------YQ-A
> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93]
-MG-------------RDLR-D-----NETWWY-----NPSI-------------------
---------------------------------------VVHPHWREFDQ-----------
---VPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLV
-NGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAA
-SKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-WG-----AYTLEGVLCNCSFDYISRD--
-STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRLNAKELR-------
-------------------------------------------------------------
-------------------------------------------------------------
-K-AQ-AGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQF--GPLEW-VTPYAAQLPVM
-FAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKETED----DKDAETEIPAGE
-SSDAAPSADAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYP
-PQGYPP---PPQGAPPQGAPP------AAPPQGVDNQA-------YQ-A
+--------------------MG-------------------------RDL-------R--
+-----------------------DNETWWY-----------------NPSI---------
+--------------VVHPHWREFDQ-------------VPDAVYYSLGIFIGICGIIGCG
+GNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAA
+CKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-
+AIGPIFG-WG-----AYTLEGVLCNCSFDYISRD--STTRSNILCMFILGFFGPILIIFF
+CYFNIVMSVSNHEKEMAAMAKRL------------------NAKELRK------------
+------------------------------------------------------------
+--------------------------------------AQ-AGANAEMRLAKISIVIVSQ
+FLLSWSPYAVVALLAQF-GPLE-W-VTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAI
+SQTFPWVLTCCQFDDKETED----DKDAET-EIPAGESSDAAPSADAAQMKEMMAMMQKM
+QQQQAAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP--
+----AAPPQGVDNQA-------YQ-A
> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra
-MMDVNSSGRPDLYGHLRSFLLPEVGRGLPDLSPDG-----------------GADPVAGS
-WAPHLLS------------------------EVTAS---PAPTW---DAPPDNASGCGEQ
-INYGRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVA
-V-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTY
-PVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCLISQDF------
-----GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------PGFP
-RVEPDSVIALNG------------------------------------------------
-----IVKLQKE------------------VEECAN------------------LSRLLKH
-ERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTFLW
-LGYANSLINPFIYAFFNRDLRTTYRSLLQCQYRNINRKLSAAGMHEALKLAERPERPEFV
-L-----------------------------------------------------------
------------------------------------QNADYCRKKGHD-S
+--------------------M---------MDV-NSSGRPDLYGHLRSFLLPEVGRGLPD
+LSPDG------------GADPVAGS--------------------------WAPHLLS--
+-EVTAS---------PAPTW---DAPPDNASGCGEQINYGRVEKVVIGSILTLITLLTIA
+GNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFF
+CNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-
+TLPPLFG-WA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPMSVMLF
+MYYQIYKAARKSAAKHKF--------------PGFPRVEPDSVIALNG------------
+----------------------------------------IVKLQKE-------------
+-----VEECAN------------------LSRLLKHERKNISIFKREQKAATTLGIIVGA
+FTVCWLPFFLLSTARPFICGTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTY
+RSLL-----QCQYRNINRKL----SAAGMHEALKLAER---------------------P
+ERPEFVLQNA--------------------------------------------------
+--------DYCRKKG-------HD-S
> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
------------------------------------------------------------
--MPHLLSGFL---------------------EVTAS---PAPTW---DAPPDNVSGCGEQ
-INYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVA
-V-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTY
-PVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCLISQDF------
-----GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------PGFP
-RVQPESVISLNG------------------------------------------------
-----VVKLQKE------------------VEECAN------------------LSRLLKH
-ERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLW
-LGYANSLINPFIYAFFNRDLRPTSRSLLQCQYRNINRKLSAAGMHEALKLAERPERSEFV
-L-----------------------------------------------------------
------------------------------------QNSDHCGKKGHD-T
+----------------------------------------------------MPHLLSGF
+LEVTAS---------PAPTW---DAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIA
+GNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFF
+CNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-
+TLPPLFG-WA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPMSVMLF
+MYYQIYKAARKSAAKHKF--------------PGFPRVQPESVISLNG------------
+----------------------------------------VVKLQKE-------------
+-----VEECAN------------------LSRLLKHERKNISIFKREQKAATTLGIIVGA
+FTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTS
+RSLL-----QCQYRNINRKL----SAAGMHEALKLAER---------------------P
+ERSEFVLQNS--------------------------------------------------
+--------DHCGKKG-------HD-T
> 31=p A47425 serotonin receptor 5HT-7 - rat
------------------------------------------------------------
--MPHLLSGFL---------------------EVTAS---PAPTW---DAPPDNVSGCGEQ
-INYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVA
-V-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTY
-PVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCLISQDF------
-----GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------PGFP
-RVQPESVISLNG------------------------------------------------
-----VVKLQKE------------------VEECAN------------------LSRLLKH
-ERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLW
-LGYANSLINPFIYAFFNRDLRTTYRSLLQCQYRNINRKLSAAGMHEALKLAERPERSEFV
-L-----------------------------------------------------------
------------------------------------QNSDHCGKKGHD-T
+----------------------------------------------------MPHLLSGF
+LEVTAS---------PAPTW---DAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIA
+GNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFF
+CNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-
+TLPPLFG-WA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPMSVMLF
+MYYQIYKAARKSAAKHKF--------------PGFPRVQPESVISLNG------------
+----------------------------------------VVKLQKE-------------
+-----VEECAN------------------LSRLLKHERKNISIFKREQKAATTLGIIVGA
+FTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTY
+RSLL-----QCQYRNINRKL----SAAGMHEALKLAER---------------------P
+ERSEFVLQNS--------------------------------------------------
+--------DHCGKKG-------HD-T
> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
-MDVLSPG-----------------------------------------------------
---------------------QGNNT------TSPPAPFETGGNTTGISD-----------
---VTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVL
-V-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDY
-VNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH------
-----GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADTRHGASPAPQ
-PKK-----SVNG--ESGSRNWRLGVESKAGGALCANGAVRQGDDGAAL--EVIEVHRVGN
-SKEHLPLPSEAGPTPCAPAS------------------FERKNERNA-------------
-EAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSCH-MPTLLGAIINW
-LGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKF----------------------------
-------------------------------------------------------------
--------------------------------------------CR---Q
+--------------------MDVLSPG---------------------------------
+---------------------------------------------QGNNT----------
+-TSPPAPF------ETGGNTTGISD-------------VTVSYQVITSLLLGTLIFCAVL
+GNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVT
+CDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-
+SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYSTFGAFYIPLLLMLV
+LYGRIFRAARFRIRKTVKKVEKTGADTRHGASPAPQPKK-----SVNG--ESGSRNWRLG
+VESKAGGALCANGAVRQGDDGAAL--EVIEVHRVGNSKEHLPLPSEAGPTPCAPAS----
+--------------FERKNERNA-------------EAKRKMALARERKTVKTLGIIMGT
+FILCWLPFFIVALVLPF-CESSCH-MPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF
+KKII-----KCKF-----------------------------------------------
+------------------------------------------------------------
+----------CR-------------Q
> 33=p A35181 serotonin receptor class 1A - rat
-MDVFSFG-----------------------------------------------------
---------------------QGNNT------TASQEPFGTGGNVTSISD-----------
---VTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVL
-V-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDY
-VNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTPEDRSDPD---ACTISKDH------
-----GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGTSLGTSSAPP
-PKK-----SLNG--QPGSGDWRRCAENRAVGTPCTNGAVRQGDDEATL--EVIEVHRVGN
-SKEHLPLPSESGSNSYAPAC------------------LERKNERNA-------------
-EAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSCH-MPALLGAIINW
-LGYSNSLLNPVIYAYFNKDFQNAFKKIIKCKF----------------------------
-------------------------------------------------------------
--------------------------------------------CR---R
+--------------------MDVFSFG---------------------------------
+---------------------------------------------QGNNT----------
+-TASQEPF------GTGGNVTSISD-------------VTFSYQVITSLLLGTLIFCAVL
+GNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVT
+CDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-
+SIPPMLG-WRTPEDRSDPD---ACTISKDH----------GYTIYSTFGAFYIPLLLMLV
+LYGRIFRAARFRIRKTVRKVEKKGAGTSLGTSSAPPPKK-----SLNG--QPGSGDWRRC
+AENRAVGTPCTNGAVRQGDDEATL--EVIEVHRVGNSKEHLPLPSESGSNSYAPAC----
+--------------LERKNERNA-------------EAKRKMALARERKTVKTLGIIMGT
+FILCWLPFFIVALVLPF-CESSCH-MPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAF
+KKII-----KCKF-----------------------------------------------
+------------------------------------------------------------
+----------CR-------------R
> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
-MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS
-HSSRSRD-----------NASANDT------SATNM---TDDRYWSLTV-----------
---YSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVL
-V-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDY
-IRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDPNN--DPDKTGTCIISQDK------
-----GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASP
-KTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSS
-SSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYS-------------
-RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-VDPE-G-IPPFARSFVLW
-LGYFNSLLNPIIYTIFSPEFRSAFQKILFGKY----------------------------
-------------------------------------------------------------
--------------------------------------------RRGH-R
+--------------------MANFTFGDLALDVARMGGLASTPSGLRS-----TGLTTPG
+LSPTGLVTSDFNDSYGLTGQFINGSHSSRSRD-----------NASANDT----------
+-SATNM---------TDDRYWSLTV-------------YSHEHLVLTSVILGLFVLCCII
+GNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEV
+CDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-
+SIPPLFG-WRDPNN--DPDKTGTCIISQDK----------GYTIFSTVGAFYLPMLVMMI
+IYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKK
+RRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEEA
+SIAMLERQCNNGKKISSNDTPYS-------------RTREKLELKRERKAARTLAIITGA
+FLICWLPFFIIALIGPF-VDPE-G-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF
+QKIL-----FGKY-----------------------------------------------
+------------------------------------------------------------
+----------RR----------GH-R
> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
-MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS
-HSSRSRD-----------NASANDT------SATNM---TDDRYWSLTV-----------
---YSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVL
-V-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDY
-IRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDPNN--DPDKTGTCIISQDK------
-----GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASP
-KTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSS
-SSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYS-------------
-RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-VDPE-G-IPPFARSFVLW
-LGYFNSLLNPIIYTIFSPEFRSAFQKILFGKY----------------------------
-------------------------------------------------------------
--------------------------------------------RRGH-R
+--------------------MANFTFGDLALDVARMGGLASTPSGLRS-----TGLTTPG
+LSPTGLVTSDFNDSYGLTGQFINGSHSSRSRD-----------NASANDT----------
+-SATNM---------TDDRYWSLTV-------------YSHEHLVLTSVILGLFVLCCII
+GNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEV
+CDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-
+SIPPLFG-WRDPNN--DPDKTGTCIISQDK----------GYTIFSTVGAFYLPMLVMMI
+IYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKK
+RRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEEA
+SIAMLERQCNNGKKISSNDTPYS-------------RTREKLELKRERKAARTLAIITGA
+FLICWLPFFIIALIGPF-VDPE-G-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAF
+QKIL-----FGKY-----------------------------------------------
+------------------------------------------------------------
+----------RR----------GH-R
> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi
-M--------------------------------------------------------EGA
-EGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVV---PNTTWWQASAPFDTP------
---AALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACL
-V-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDY
-IHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV------
-----GYQIFATASSFYVPVLIILILYWRIYQTARKRIRR------RRGATARGGVGPPPV
-PAG---------------------------------GALVAGGGSGGIAAAVVAV-----
-IGRPLPTISETTTTGFTNVSSNNTSPE--KQSCANGLEADPPTTGYGAVAAAYYPSLVRR
-KPKEAADSKRERKAAKTLAIITGAFVACWLPFFVLAILVPT-CD--CE-VSPVLTSLSLW
-LGYFNSTLNPVIYTVFSPEFRHAFQRLLCGRR----------------------------
-------------------------------------------------------------
-----------------------------------------VRRRRAP-Q
+--------------------M---------------------------------------
+----------------------EGAEGQEELDWEALYLRLPLQNCSWNSTGWEPN-----
+WNVTVV---------PNTTWWQASAPFDT--------PAALVRAAAKAVVLGLLILATVV
+GNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPEL
+CDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-
+CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV----------GYQIFATASSFYVPVLIILI
+LYWRIYQTARKRIRR------RRGATARGGVGPPPVPAG---------------------
+------------GALVAGGGSGGIAAAVVAV-----IGRPLPTISETTTTGFTNVSSNNT
+SPE--KQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKRERKAAKTLAIITGA
+FVACWLPFFVLAILVPT-CD--CE-VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAF
+QRLL-----CGRR-----------------------------------------------
+------------------------------------------------------------
+----------VRRRR-------AP-Q
> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
-M-------------------NGTE--GDNFYVPFSNKTGLA--RSPYEYPQY-YL-----
+MN-------------------------GTE-------GDNFYVP----------------
+-------------------------FSNKTG------LARSPYEYPQY-YLAEPW-----
+-----------------KYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLN
+LAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIE
+RYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-WS-----RYIPEGMQCS
+CGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA-----
------------------------------------------------------------
---AEPWKYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLF
-G-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN
-F-RFGNTHAIMGVAFTWIMALAC-AAPPLV-GWS-----RYIPEGMQCSCGPDYYTLNPN
-FNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQ-------------------
------------------------------------------------------------
+---------------------------------STQKAEKEVTRMVVLMVIGFLVCWVPY
+ASVAFYIFT---HQGS-DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTL---
+--CCGKNPLGDDE-SG-ASTSKTEVSSVST------------------------------
------------------------------------------------------------
---ESASTQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT--HQGSD-FGATFMTLPAF
-FAKSSALYNPVIYILMNKQFRNCMITTL-----CCGKNPLGDDE--SGASTSKTEVSSVS
-T-----------------------------------------------------------
------------------------------------SPVSPA
+----SPVSPA
> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]
-M-------------------NGTE--GPNFYVPFSNITGVV--RSPFEQPQY-YL-----
+MN-------------------------GTE-------GPNFYVP----------------
+-------------------------FSNITG------VVRSPFEQPQY-YLAEPW-----
+-----------------QFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLN
+LAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIE
+RYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLVG-WS-----RYIPEGMQCS
+CGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA-----
------------------------------------------------------------
---AEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFG
-G-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN
-F-RFGENHAIMGVAFTWVMALAC-AAPPLV-GWS-----RYIPEGMQCSCGIDYYTLKPE
-VNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQ-------------------
------------------------------------------------------------
+---------------------------------TTQKAEKEVTRMVIIMVIFFLICWLPY
+ASVAMYIFT---HQGS-NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSL---
+--CCGKNPLGDDE-AS-ATASKTET-----------------------------------
------------------------------------------------------------
---ESATTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFT--HQGSN-FGPIFMTLPAF
-FAKTASIYNPIIYIMMNKQFRNCMLTSL-----CCGKNPLGDDE--ASATASKTET----
-------------------------------------------------------------
------------------------------------SQVAPA
+----SQVAPA
> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9
-M-------------------NGTE--GINFYVPMSNKTGVV--RSPFEYPQY-YL-----
+MN-------------------------GTE-------GINFYVP----------------
+-------------------------MSNKTG------VVRSPFEYPQY-YLAEPW-----
+-----------------KYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVN
+LAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIE
+RYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-WS-----RYMPEGMQCS
+CGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA-----
------------------------------------------------------------
---AEPWKYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACF
-G-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN
-F-RFSATHAMMGIAFTWVMAFSC-AAPPLF-GWS-----RYMPEGMQCSCGPDYYTHNPD
-YHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQ-------------------
------------------------------------------------------------
+---------------------------------TTQKAEKEVTRMVILMVLGFMLAWTPY
+AVVAFWIFT---NKGA-DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTI---
+--CCGKNPFGDEDVSSTVSQSKTEVSSVSS------------------------------
------------------------------------------------------------
---ESATTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT--NKGAD-FTATLMAVPAF
-FSKSSSLYNPIIYVLMNKQFRNCMITTI-----CCGKNPFGDEDVSSTVSQSKTEVSSVS
-S-----------------------------------------------------------
------------------------------------SQVSPA
+----SQVSPA
> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
-M-------------------NGTE--GKNFYVPMSNRTGLV--RSPFEYPQY-YL-----
-------------------------------------------------------------
---AEPWQFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCF
-G-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS
-F-KFSSSHAFAGIAFTWVMALAC-AAPPLF-GWS-----RYIPEGMQCSCGPDYYTLNPD
-YNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQ-------------------
+MN-------------------------GTE-------GKNFYVP----------------
+-------------------------MSNRTG------LVRSPFEYPQY-YLAEPW-----
+-----------------QFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVN
+LAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIE
+RYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-WS-----RYIPEGMQCS
+CGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA-----
------------------------------------------------------------
------------------------------------------------------------
---DSASTQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFF--NKGAD-FSAKFMAIPAF
-FSKSSALYNPVIYVLLNKQFRNCMLTTI-----FCGKNPLGDDE-SSTVSTSKTEVSS--
+---------------------------------STQKAEREVTKMVILMVFGFLIAWTPY
+ATVAAWIFF---NKGA-DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTI---
+--FCGKNPLGDDE-SSTVSTSKTEVSS---------------------------------
------------------------------------------------------------
--------------------------------------VSPA
+------VSPA
> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
-M-------------------NGTE--GNNFYVPLSNRTGLV--RSPFEYPQY-YL-----
-------------------------------------------------------------
---AEPWQFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCF
-G-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS
-F-KFSSTHASAGIAFTWVMAMAC-AAPPLV-GWS-----RYIPEGIQCSCGPDYYTLNPE
-YNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQ-------------------
+MN-------------------------GTE-------GNNFYVP----------------
+-------------------------LSNRTG------LVRSPFEYPQY-YLAEPW-----
+-----------------QFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVN
+LAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIE
+RYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-WS-----RYIPEGIQCS
+CGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA-----
------------------------------------------------------------
------------------------------------------------------------
---DSASTQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFF--NKGAA-FSAQFMAIPAF
-FSKTSALYNPVIYVLLNKQFRSCMLTTL-----FCGKNPLGDEE-SSTVSTSKTEVSS--
+---------------------------------STQKAEREVTKMVILMVLGFLVAWTPY
+ATVAAWIFF---NKGA-AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTL---
+--FCGKNPLGDEE-SSTVSTSKTEVSS---------------------------------
------------------------------------------------------------
--------------------------------------VSPA
+------VSPA
> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208-
-M-------------------KQVPEFHEDFYIPIPLDINNLSAYSPFLVPQD-HL-----
+MK-------------------------QVPE-----FHEDFYIP----------------
+-------------------IPLD--INNLSAY--------SPFLVPQD-HLGNQG-----
+-----------------IFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVN
+LSIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFE
+RWLVICKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLFG-WS-----RYIPEGLQCS
+CGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA-----
------------------------------------------------------------
---GNQGIFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIF
-G-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN
-F-TFKTPHAIAGCILPWISALAA-SLPPLF-GWS-----RYIPEGLQCSCGPDWYTTNNK
-YNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQA-------------------
------------------------------------------------------------
+---------------------------------STQKAEREVTKMVVVMVLGFLVCWAPY
+ASFSLWIVS---HRGE-EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMMKMV---
+---CGKN-IEEDE-AS-TSSQVTQVSSVAP------------------------------
------------------------------------------------------------
---DSASTQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVS--HRGEE-FDLRMATIPSC
-LSKASTVYNPVIYVLMNKQFRSCMMKMV------CGKNIEEDEA---STSSQVTQVSSVA
-P-----------------------------------------------------------
----------------------------------------EK
+--------EK
> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
-M-------------------RKMS--EEEFYLFKNISS-----VGPWDGPQY-HI-----
+MR-------------------------KMS-------EEEFYL-----------------
+-------------------------FKNISSV--------GPWDGPQY-HIAPVW-----
+-----------------AFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVN
+VSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFE
+RYIVICKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFFG-WS-----RFIPEGLQCS
+CGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA-----
------------------------------------------------------------
---APVWAFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIF
-S-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN
-F-RFSSKHALTVVLATWTIGIGV-SIPPFF-GWS-----RFIPEGLQCSCGPDWYTVGTK
-YRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQ-------------------
------------------------------------------------------------
+---------------------------------TTQKAEREVSRMVVVMVGSFCVCYVPY
+AAFAMYMVN---NRNH-GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIMKMV---
+---CGKA-MTDES-DT-CSSQKTEVSTVSS------------------------------
------------------------------------------------------------
---ESATTQKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN--NRNHG-LDLRLVTIPSF
-FSKSACIYNPIIYCFMNKQFQACIMKMV------CGKAMTDESD---TCSSQKTEVSTVS
-S-----------------------------------------------------------
------------------------------------TQVGPN
+----TQVGPN
> 8=opsin, greensensitive human (fragment) S07060
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------DLAETVI
-A-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN
-V-RFDAKLAIVGIAFSWIWAAVW-TAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSSY
-PGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQK-------------------
------------------------------------------------------------
+----DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWE
+RWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLKTS
+CGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE-----
------------------------------------------------------------
---ESESTQKAEKEVTRMVVVMVLAFC----------------------------------
------------------------------------------------------------
+---------------------------------STQKAEKEVTRMVVVMVLAFC------
------------------------------------------------------------
------------------------------------------
-> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
-MAQQWSLQRLAGRHPQDSYEDSTQ--SSIFTYTNSNST-----RGPFEGPNY-HI-----
------------------------------------------------------------
---APRWVYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVI
-A-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN
-V-RFDAKLAIVGIAFSWIWAAVW-TAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSSY
-PGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQK-------------------
------------------------------------------------------------
+----------
+> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
+MA------QQWSLQRLAGRHPQDSYEDSTQ-------SSIFTYT----------------
+-------------------------NSNSTR---------GPFEGPNY-HIAPRW-----
+-----------------VYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVN
+LAVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWE
+RWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-WS-----RYWPHGLKTS
+CGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE-----
------------------------------------------------------------
---ESESTQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA--NPGYP-FHPLMAALPAF
-FAKSATIYNPVIYVFMNRQFRNCILQLF-------GKKVDDGSE-LS--SASKTEVSSV-
------------------------------------------------------------
------------------------------------SSVSPA
-> 10== Z68193 1 human Red Opsin <>[]
-MAQQWSLQRLAGRHPQDSYEDSTQ--SSIFTYTNSNST-----RGPFEGPNY-HI-----
+---------------------------------STQKAEKEVTRMVVVMVLAFCFCWGPY
+AFFACFAAA---NPGY-PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF---
+----GKKVDDGSE-LS--SASKTEVSSV--------------------------------
------------------------------------------------------------
---APRWVYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVI
-A-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN
-V-RFDAKLAIVGIAFSWIWSAVW-TAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSSY
-PGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQK-------------------
+----SSVSPA
+> 10== Z68193 1 human Red Opsin <>[]
+MA------QQWSLQRLAGRHPQDSYEDSTQ-------SSIFTYT----------------
+-------------------------NSNSTR---------GPFEGPNY-HIAPRW-----
+-----------------VYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVN
+LAVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWE
+RWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIFG-WS-----RYWPHGLKTS
+CGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE-----
------------------------------------------------------------
------------------------------------------------------------
---ESESTQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA--NPGYA-FHPLMAALPAY
-FAKSATIYNPVIYVFMNRQFRNCILQLF-------GKKVDDGSE-LS--SASKTEVSSV-
+---------------------------------STQKAEKEVTRMVVVMIFAYCVCWGPY
+TFFACFAAA---NPGY-AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF---
+----GKKVDDGSE-LS--SASKTEVSSV--------------------------------
------------------------------------------------------------
------------------------------------SSVSPA
+----SSVSPA
> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92]
-MTEAWNVAVFAARRSRDD-DDTTR--GSVFTYTNTNNT-----RGPFEGPNY-HI-----
+MT------EAWNVAVFAARRSRDD-DDTTR-------GSVFTYT----------------
+-------------------------NTNNTR---------GPFEGPNY-HIAPRW-----
+-----------------VYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVN
+LAFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWE
+RWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIFG-WS-----RYWPHGLKTS
+CGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE-----
------------------------------------------------------------
---APRWVYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLV
-A-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN
-I-KFDSKLAIIGIVFSWVWAWGW-SAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSVE
-LGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQK-------------------
------------------------------------------------------------
+---------------------------------STQKAEREVSRMVVVMIVAFCICWGPY
+ASFVSFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF---
+----GKKVDDGSE-AS--TTSRTEVSSVSN------------------------------
------------------------------------------------------------
---ESESTQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAA--NPGYA-FHPLAAALPAY
-FAKSATIYNPVIYVFMNRQFRNCIMQLF-------GKKVDDGSE-AS--TTSRTEVSSVS
-N-----------------------------------------------------------
------------------------------------SSVAPA
+----SSVAPA
> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
-MA-AWE-AAFAARRRHEE-EDTTR--DSVFTYTNSNNT-----RGPFEGPNY-HI-----
+MA-------AWE-AAFAARRRHEE-EDTTR-------DSVFTYT----------------
+-------------------------NSNNTR---------GPFEGPNY-HIAPRW-----
+-----------------VYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVN
+LAVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWE
+RWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIFG-WS-----RYWPHGLKTS
+CGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE-----
------------------------------------------------------------
---APRWVYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVI
-A-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN
-I-KFDGKLAVAGILFSWLWSCAW-TAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSSD
-PGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQK-------------------
------------------------------------------------------------
+---------------------------------STQKAEKEVSRMVVVMIVAYCFCWGPY
+TFFACFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF---
+----GKKVDDGSE-VS---TSRTEVSSVSN------------------------------
------------------------------------------------------------
---ESESTQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA--NPGYA-FHPLAAALPAY
-FAKSATIYNPIIYVFMNRQFRNCILQLF-------GKKVDDGSE-VS---TSRTEVSSVS
-N-----------------------------------------------------------
------------------------------------SSVSPA
+----SSVSPA
> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
-M-------------------------SSNSSQAPPNGT-----PGPFDGPQWPYQ-----
+MS-------------------------SNS-------SQA--------------------
+-------------------------PPNGTP---------GPFDGPQWPYQAPQS-----
+-----------------TYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVN
+LAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALE
+RYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLLG-WS-----SYVPEGLRTS
+CGPNWYTGGS--NNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD-----
------------------------------------------------------------
---APQSTYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLC
-G-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD
-F-QFQRRHAVSGCAFTWGWALLW-SAPPLL-GWS-----SYVPEGLRTSCGPNWYTGGS-
--NNNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQK-------------------
------------------------------------------------------------
+---------------------------------TTQRAEREVTRMVIVMVMAFLLCWLPY
+STFALVVAT---HKGI-IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEML---
+--CCGYQPQRTGKASPGTPGPHADVTAAGLRNKV--------------------------
------------------------------------------------------------
---EADTTQRAEREVTRMVIVMVMAFLLCWLPYSTFALVVAT--HKGII-IQPVLASLPSY
-FSKTATVYNPIIYVFMNKQFQSCLLEML-----CCGYQPQRTGKASPGTPGPHADVTAAG
-L-----------------------------------------------------------
--------------------------------RNKVMPAHPV
+----MPAHPV
> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
-MESGNVSSSLFGNVSTALRPEARLSAE----------TRLLGWNVPPEELR---H-----
----------------------------------------IPEHWLTYPE-----------
---PPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK
---TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG
---KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVPEGYLTSCTFDYLTDNF-
--DTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVESLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-SNVDKNKETAEIRIAKAAITICFLFFCSWTPYGVMSLIGAF--GDKTL-LTPGATMIPAC
-ACKMVACIDPFVYAISHPRYRMELQKRCPWL--ALNEKAPE-------SSAVASTSTTQE
-P-----------------------------------------------------------
------------------------------------QQTTAA
+MESGNVSS------------SLFGNVSTAL-------RPE--------------------
+--------ARLSAE-------------TRLL------GWNVPPEELR--HIPEHWLTYPE
+-------------PPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVIN
+LAFCDFMMM-VK-TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYD
+RFNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVPEGYLTS
+CTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNV
+ES----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRS-------NVDKNKETAEIRIAKAAITICFLFFCSWTPY
+GVMSLIGAF---GDKT-LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWL
+--ALNEKAPESSA----VASTST---TQEP------------------------------
+------------------------------------------------------------
+----QQTTAA
> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
-MEYHNVSSVL-GNVSSVLRPDARLSAE----------SRLLGWNVPPDELR---H-----
----------------------------------------IPEHWLIYPE-----------
---PPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK
---TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG
---KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVPEGYLTSCTFDYLTDNF-
--DTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNVDSLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-SNVDKSKEAAEIRIAKAAITICFLFFASWTPYGVMSLIGAF--GDKTL-LTPGATMIPAC
-TCKMVACIDPFVYAISHPRYRMELQKRCPWL--AISEKAPE-------SRAAISTSTTQE
-Q-----------------------------------------------------------
------------------------------------QQTTAA
+MEYHNVSS------------VL-GNVSSVL-------RPD--------------------
+--------ARLSAE-------------SRLL------GWNVPPDELR--HIPEHWLIYPE
+-------------PPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVIN
+LAFCDFMMM-IK-TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYD
+RYNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVPEGYLTS
+CTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKKMNV
+DS----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRS-------NVDKSKEAAEIRIAKAAITICFLFFASWTPY
+GVMSLIGAF---GDKT-LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWL
+--AISEKAPESRA----AISTST---TQEQ------------------------------
+------------------------------------------------------------
+----QQTTAA
> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
-ME------PLCNASEPPLRPEAR-SSGNGD-------LQFLGWNVPPDQIQ---Y-----
----------------------------------------IPEHWLTQLE-----------
---PPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK
---APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR
---NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTSCSFDYLSDNF-
--DTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKMNVESLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-SNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF--GDKSL-LTQGATMIPAC
-TCKLVACIDPFVYAISHPRYRLELQKRCPWL--GVNEKSGE-------ISSAQSTTTQEQ
-------------------------------------------------------------
------------------------------------QQTTAA
+ME------------------PLCNASEPPL-------RPE--------------------
+--------AR-SSGNGD----------LQFL------GWNVPPDQIQ--YIPEHWLTQLE
+-------------PPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLN
+LAVFDLIMC-LK-APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYD
+RYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTS
+CSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKKMNV
+ES----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRS-------NVDKSKETAEIRIAKAAITICFLFFVSWTPY
+GVMSLIGAF---GDKS-LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWL
+--GVNEKSGEISS----AQSTTT---QEQ-------------------------------
+------------------------------------------------------------
+----QQTTAA
> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
-MD------ALCNASEPPLRPEARMSSGSDE-------LQFLGWNVPPDQIQ---Y-----
----------------------------------------IPEHWLTQLE-----------
---PPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK
---APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR
---NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTSCSFDYLSDNF-
--DTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKMNVESLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-SNVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF--GDKSL-LTPGATMIPAC
-TCKLVACIEPFVYAISHPRYRMELQKRCPWL--GVNEKSGE-------ASSAQSTTTQEQ
-T-----------------------------------------------------------
------------------------------------QQTSAA
+MD------------------ALCNASEPPL-------RPE--------------------
+--------ARMSSGSDE----------LQFL------GWNVPPDQIQ--YIPEHWLTQLE
+-------------PPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLN
+LAVFDLIMC-LK-APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYD
+RYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPEGYLTS
+CSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKKMNV
+ES----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRS-------NVDKSKETAEIRIAKAAITICFLFFVSWTPY
+GVMSLIGAF---GDKS-LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWL
+--GVNEKSGEASS----AQSTTT---QEQT------------------------------
+------------------------------------------------------------
+----QQTSAA
> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1
-MT-----------NATGPQMAYYGAASMDFGYPE---GVSIVDFVRPEIKP---Y-----
----------------------------------------VHQHWYNYPP-----------
---VNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTT
-N-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG
-P-KLTTGKAVVFALISWVIAIGC-ALPPFF-GWG-----NYILEGILDSCSYDYLTQDF-
--NTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKMNVSTLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-NEADAQRAEIRIAKTALVNVSLWFICWTPYALISLKGVM--GDTSG-ITPLVSTLPAL
-LAKSCSCYNPFVYAISHPKYRLAITQHLPWF--CVHETETK-------SNDDSQSNSTVA
-Q-----------------------------------------------------------
---------------------------------------DKA
+MT-------------------------NAT-------GPQMAYYG---------------
+-------AASMDFGYPE----------GVSI------VDFVRPEIKP--YVHQHWYNYPP
+-------------VNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVN
+LALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFD
+RYNIICNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFFG-WG-----NYILEGILDS
+CSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKKMNV
+ST----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRS--------NEADAQRAEIRIAKTALVNVSLWFICWTPY
+ALISLKGVM---GDTS-GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWF
+--CVHETETKSND----DSQSNS---TVAQ------------------------------
+------------------------------------------------------------
+-------DKA
> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1
-MA-----------NVTGPQMAFYGSGAATFGYPE---GMTVADFVPDRVKH---M-----
----------------------------------------VLDHWYNYPP-----------
---VNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTT
-N-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG
-P-KLTQGKATFMCGLAWVISVGW-SLPPFF-GWG-----SYTLEGILDSCSYDYFTRDM-
--NTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKMNVTNLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-NEAETQRAEIRIAKTALVNVSLWFICWTPYAAITIQGLL--GNAEG-ITPLLTTLPAL
-LAKSCSCYNPFVYAISHPKFRLAITQHLPWF--CVHEKDPN-------DVEENQSSNTQT
-Q-----------------------------------------------------------
---------------------------------------EKS
+MA-------------------------NVT-------GPQMAFYG---------------
+-------SGAATFGYPE----------GMTV------ADFVPDRVKH--MVLDHWYNYPP
+-------------VNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVN
+LALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFD
+RYNIICNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFFG-WG-----SYTLEGILDS
+CSYDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKKMNV
+TN----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRS--------NEAETQRAEIRIAKTALVNVSLWFICWTPY
+AAITIQGLL---GNAE-GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWF
+--CVHEKDPNDVE----ENQSSN---TQTQ------------------------------
+------------------------------------------------------------
+-------EKS
> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
-ME-----SFAVAAAQLGPHFA-----PLS--------NGSVVDKVTPDMAH---L-----
----------------------------------------ISPYWNQFPA-----------
---MDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT
-N-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG
-R-PMTIPLALGKM---------------------------YVPEGNLTSCGIDYLERDW-
--NPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF--KF-EG-LTPLNTIWGAC
-FAKSAACYNPIVYGISHPKYRLALKEKCPCC--VFGKVDDGKSS----DAQSQA-TASEA
-E-----------------------------------------------------------
---------------------------------------SKA
+MESFAV-------------------AAAQL-------GPHF-------------------
+--------APLS---------------NGSV------VDKVTPDMAH--LISPYWNQFPA
+-------------MDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVIN
+LAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLD
+RYQVIVKGMAGR-PMTIPLALGKM---------------------------YVPEGNLTS
+CGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNV
+KS----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRS--------SEDAEKSAEGKLAKVALVTITLWFMAWTPY
+LVINCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC
+--VFGKVDDGKSS----DAQSQA-TASEAE------------------------------
+------------------------------------------------------------
+-------SKA
> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
-ME-----SFAVAAAQLGPHFA-----PLS--------NGSVVDKVTPDMAH---L-----
----------------------------------------ISPYWNQFPA-----------
---MDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT
-N-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG
-R-PMTIPLALGKIAYIWFMSSIW-CLAPAF-GWS-----RYVPEGNLTSCGIDYLERDW-
--NPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF--KF-EG-LTPLNTIWGAC
-FAKSAACYNPIVYGISHPKYRLALKEKCPCC--VFGKVDDGKSS----DAQSQA-TASEA
-E-----------------------------------------------------------
---------------------------------------SKA
+MESFAV-------------------AAAQL-------GPHF-------------------
+--------APLS---------------NGSV------VDKVTPDMAH--LISPYWNQFPA
+-------------MDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVIN
+LAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLD
+RYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-WS-----RYVPEGNLTS
+CGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNV
+KS----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRS--------SEDAEKSAEGKLAKVALVTITLWFMAWTPY
+LVINCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC
+--VFGKVDDGKSS----DAQSQA-TASEAE------------------------------
+------------------------------------------------------------
+-------SKA
> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
-MD-----SFAAVATQLGPQFA-----APS--------NGSVVDKVTPDMAH---L-----
----------------------------------------ISPYWDQFPA-----------
---MDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMIT
-N-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG
-R-PMTIPLALGKIAYIWFMSTIWCCLAPVF-GWS-----RYVPEGNLTSCGIDYLERDW-
--NPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-SEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMGLF--KF-EG-LTPLNTIWGAC
-FAKSAACYNPIVYGISHPKYRLALKEKCPCC--VFGKVDDGKSS----EAQSQA-TTSEA
-E-----------------------------------------------------------
---------------------------------------SKA
+MDSFAA-------------------VATQL-------GPQF-------------------
+--------AAPS---------------NGSV------VDKVTPDMAH--LISPYWDQFPA
+-------------MDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVIN
+LAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLD
+RYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-WS-----RYVPEGNLTS
+CGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKKMNV
+KS----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRS--------SEDADKSAEGKLAKVALVTISLWFMAWTPY
+LVINCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCC
+--VFGKVDDGKSS----EAQSQA-TTSEAE------------------------------
+------------------------------------------------------------
+-------SKA
> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
-MERSHLPETPFDLAHSGPRFQ-----AQSSG------NGSVLDNVLPDMAH---L-----
----------------------------------------VNPYWSRFAP-----------
---MDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMAS
-Q-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING
-T-PMTIKTSIMKILFIWMMAVFW-TVMPLI-GWS-----AYVPEGNLTACSIDYMTRMW-
--NPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKMNVKSLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-SEDCDKSAEGKLAKVALTTISLWFMAWTPYLVICYFGLF--KI-DG-LTPLTTIWGAT
-FAKTSAVYNPIVYGISHPKYRIVLKEKCPMC--VFGNTDEPKPD----APASDTETTSEA
-D-----------------------------------------------------------
---------------------------------------SKA
+MERSHLPETPF--------------DLAHS-------GPRF-------------------
+--------QAQSSG-------------NGSV------LDNVLPDMAH--LVNPYWSRFAP
+-------------MDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLN
+LAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFD
+RYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLIG-WS-----AYVPEGNLTA
+CSIDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKKMNV
+KS----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRS--------SEDCDKSAEGKLAKVALTTISLWFMAWTPY
+LVICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMC
+--VFGNTDEPKPD----APASDTETTSEAD------------------------------
+------------------------------------------------------------
+-------SKA
> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
-MERSLLPEPPLAMALLGPRFE-----AQTGG------NRSVLDNVLPDMAP---L-----
----------------------------------------VNPHWSRFAP-----------
---MDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMAS
-Q-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING
-T-PMTIKTSIMKIAFIWMMAVFW-TIMPLI-GWS-----SYVPEGNLTACSIDYMTRQW-
--NPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKMNVKSLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-SEDCDKSAENKLAKVALTTISLWFMAWTPYLIICYFGLF--KI-DG-LTPLTTIWGAT
-FAKTSAVYNPIVYGISHPNDRLVLKEKCPMC--VCGTTDEPKPD----APPSDTETTSEA
-E-----------------------------------------------------------
---------------------------------------SKD
+MERSLLPEPPL--------------AMALL-------GPRF-------------------
+--------EAQTGG-------------NRSV------LDNVLPDMAP--LVNPHWSRFAP
+-------------MDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLN
+LAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFD
+RYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLIG-WS-----SYVPEGNLTA
+CSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKKMNV
+KS----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRS--------SEDCDKSAENKLAKVALTTISLWFMAWTPY
+LIICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMC
+--VCGTTDEPKPD----APPSDTETTSEAE------------------------------
+------------------------------------------------------------
+-------SKD
> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
-MI-----------AVSGPSYE-----AFSYGGQARFNNQTVVDKVPPDMLH---L-----
----------------------------------------IDANWYQYPP-----------
---LNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFC
-M-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG
-K-PLSINGALIRIIAIWLFSLGW-TIAPMF-GWN-----RYVPEGNMTACGTDYFNRGL-
--LSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKMNVASLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-S-SENQNTSAECKLAKVALMTISLWFMAWTPYLVINFSGIF--NL-VK-ISPLFTIWGSL
-FAKANAVYNPIVYGISHPKYRAALFAKFPSL--ACAA--EPSSD----AVSTTSGTTTVT
-D-----------------------------------------------------------
------------------------------------NEKSNA
+MI-------------------------AVS-------GPSY-------------------
+--------EAFSYGGQAR-------FNNQTV------VDKVPPDMLH--LIDANWYQYPP
+-------------LNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVIN
+LAISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFD
+RYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMFG-WN-----RYVPEGNMTA
+CGTDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKKMNV
+AS----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRS--------SENQNTSAECKLAKVALMTISLWFMAWTPY
+LVINFSGIF---NL-V-KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSL
+--ACAA--EPSSD----AVSTTSGTTTVTD------------------------------
+------------------------------------------------------------
+----NEKSNA
> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
-MA------------------NQLSYSSLGWPYQP---NASVVDTMPKEMLY---M-----
----------------------------------------IHEHWYAFPP-----------
---MNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAF
-M-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA
-A-PLTHKKATLLLLFVWIWSGGW-TILPFF-GWS-----RYVPEGNLTSCTVDYLTKDW-
--SSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKMNVASLR-------
-------------------------------------------------------------
-------------------------------------------------------------
-ANADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVF--SSGTR-LTPLATIWGSV
-FAKANSCYNPIVYGISHPRYKAALYQRFPSL--ACGSGESGSDV----KSEASATTTMEE
-K-----------------------------------------------------------
------------------------------------PKIPEA
+MAN----------------------------------QLSY-------------------
+--------SSLGWPYQP----------NASV------VDTMPKEMLY--MIHEHWYAFPP
+-------------MNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVN
+LAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLD
+RYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFFG-WS-----RYVPEGNLTS
+CTVDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKKMNV
+AS----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRA-------NADQQKQSAECRLAKVAMMTVGLWFMAWTPY
+LIISWAGVF---SSGT-RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSL
+--ACGSGESGSDV----KSEASA-TTTMEEK-----------------------------
+------------------------------------------------------------
+----PKIPEA
> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
-MV------------------ESTTLVNQTWWYNPTVD-----------------------
----------------------------------------IHPHWAKFDP-----------
---IPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAI
-NGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAA
-SKKMSHRRAFLMIIFVWMWSIVW-SVGPVF-NWG-----AYVPEGILTSCSFDYLSTDP-
--STRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRLNAKELR-------
-------------------------------------------------------------
-------------------------------------------------------------
---KAQAGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQF--GPAEW-VTPYAAELPVL
-FAKASAIHNPIVYSVSHPKFREAIQTTFPWLLTCCQFDEKECED----ANDAEEEVVASE
-R--GGESRDAAQMKEMMAMMQKMQAQQAAYQP---PPPPQGYPPQGYPPQGAYPPPQGYP
-PQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA
+MV-------------------------ESTT-----L-----------------------
+--------VNQTWWY------------NPTV------------------DIHPHWAKFDP
+-------------IPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIIN
+LAMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISID
+RYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-WG-----AYVPEGILTS
+CSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKRLNA
+KE----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRK---------AQAGASAEMKLAKISMVIITQFMLSWSPY
+AIIALLAQF---GPAE-WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWL
+LTCCQFDEKECED----ANDAEEEVVASER--GGESRDAAQMKEMMAMMQKMQAQQAAYQ
+P---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPP
+QGVDNQAYQA
> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93]
-MG------------------RDLR-DNETWWYNPSIV-----------------------
----------------------------------------VHPHWREFDQ-----------
---VPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLV
-NGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAA
-SKKMSHRRAFIMIIFVWLWSVLW-AIGPIF-GWG-----AYTLEGVLCNCSFDYISRDS-
--TTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRLNAKELR-------
-------------------------------------------------------------
-------------------------------------------------------------
---KAQAGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQF--GPLEW-VTPYAAQLPVM
-FAKASAIHNPMIYSVSHPKFREAISQTFPWVLTCCQFDDKETED----DKDAETEIPAGE
-SSDAAPSADAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYP
-PQGYPP---PPQGAPPQGAPP------AAPPQGVDNQAYQA
+MG-------------------------RDLR-----------------------------
+--------DNETWWY------------NPSI------------------VVHPHWREFDQ
+-------------VPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIIN
+LAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISID
+RYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-WG-----AYTLEGVLCN
+CSFDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKRLNA
+KE----------------------------------------------------------
+------------------------------------------------------------
+-------------------LRK---------AQAGANAEMRLAKISIVIVSQFLLSWSPY
+AVVALLAQF---GPLE-WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWV
+LTCCQFDDKETED----DKDAETEIPAGESSDAAPSADAAQMKEMMAMMQKMQQQQAAYP
+PQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP------AAPP
+QGVDNQAYQA
> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra
-MMD-----------------VNSSGRPDLYGHLRSFLLPEVGRGLPDLSPDGGADPVAGS
-WAPHLLS------------------------EVTAS---PAPTW---DAPPDNASGCGEQ
-INYGRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVA
-V-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTY
-PVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVNDDKVCLISQDF------
-----GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------PGFP
-RVEPDSVIALNG------------------------------------------------
-----IVKLQKE------------------VEECAN------------------LSRLLKH
-ERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTFLW
-LGYANSLINPFIYAFFNRDLRTTYRSLL-----QCQYRNINRKLSAAGMHEALKLAERPE
-RPEFV-------------------------------------------------------
----------------------------LQNADYCRKKGHDS
+MMDVNSS----------GRPDLYGHLRSFLLPEVGRGLPDLSPDG------------GAD
+PVAGSWAPHLLS------------------------EVTASP---------APTW---DA
+PPDNASGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVS
+LALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISID
+RYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCL
+ISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG-----
+---------FPRVEPDSVIALNG-------------------------------------
+-------------------------IVKLQKE-------------------VEECAN---
+---------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWLPF
+FLLSTARPFICGTSCS-CIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLL---
+--QCQYRNINRKL----SAAGMHEALKLAER--------------------PERPEFVLQ
+NADY--------------------------------------------------------
+--CRKKGHDS
> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
M-----------------------------------------------------------
---PHLLS---------------------GFLEVTAS---PAPTW---DAPPDNVSGCGEQ
-INYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVA
-V-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTY
-PVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVNDDKVCLISQDF------
-----GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------PGFP
-RVQPESVISLNG------------------------------------------------
-----VVKLQKE------------------VEECAN------------------LSRLLKH
-ERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLW
-LGYANSLINPFIYAFFNRDLRPTSRSLL-----QCQYRNINRKLSAAGMHEALKLAERPE
-RSEFV-------------------------------------------------------
----------------------------LQNSDHCGKKGHDT
+-------PHLLS---------------------GFLEVTASP---------APTW---DA
+PPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVS
+LALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISID
+RYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCL
+ISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG-----
+---------FPRVQPESVISLNG-------------------------------------
+-------------------------VVKLQKE-------------------VEECAN---
+---------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWLPF
+FLLSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLL---
+--QCQYRNINRKL----SAAGMHEALKLAER--------------------PERSEFVLQ
+NSDH--------------------------------------------------------
+--CGKKGHDT
> 31=p A47425 serotonin receptor 5HT-7 - rat
M-----------------------------------------------------------
---PHLLS---------------------GFLEVTAS---PAPTW---DAPPDNVSGCGEQ
-INYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVA
-V-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTY
-PVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVNDDKVCLISQDF------
-----GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF--------------PGFP
-RVQPESVISLNG------------------------------------------------
-----VVKLQKE------------------VEECAN------------------LSRLLKH
-ERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLW
-LGYANSLINPFIYAFFNRDLRTTYRSLL-----QCQYRNINRKLSAAGMHEALKLAERPE
-RSEFV-------------------------------------------------------
----------------------------LQNSDHCGKKGHDT
+-------PHLLS---------------------GFLEVTASP---------APTW---DA
+PPDNVSGCGEQINYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVS
+LALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISID
+RYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-WA-----QNVNDDKVCL
+ISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG-----
+---------FPRVQPESVISLNG-------------------------------------
+-------------------------VVKLQKE-------------------VEECAN---
+---------------LSRLLKH------ERKNISIFKREQKAATTLGIIVGAFTVCWLPF
+FLLSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLL---
+--QCQYRNINRKL----SAAGMHEALKLAER--------------------PERSEFVLQ
+NSDH--------------------------------------------------------
+--CGKKGHDT
> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
-M-----------------------------------------------------------
---------------------DVLSPGQGNNTTSPPAPFETGGNTTGISD-----------
---VTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVL
-V-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDY
-VNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTP---EDRSDPDACTISKDH------
-----GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGADTRHGASPAPQ
-PKK---------SVNGESGSRNWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGN
-SKEHLPLPSEAGPTPCAPASFERKNERNA-------------------------------
-EAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSCH-MPTLLGAIINW
-LGYSNSLLNPVIYAYFNKDFQNAFKKII-----KCKF-----------------------
-------------------------------------------------------------
---------------------------------------CRQ
+MDVLSPG-----------------------------------------------------
+-------------------------QGNNTT------SPPAPF------ETGGNTTGISD
+-------------VTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGS
+LAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALD
+RYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACT
+ISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKVEKTGA
+DTRHGASPAPQPKK-----SVNGE--SGSRNWRLGVESKAGGALCA--------------
+-NGAVRQGD---------------------------------DGAALEVIEVHRVGNSKE
+HLPLPSEAG--PTPCAPASFERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPF
+FIVALVLPF---CESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII---
+--KCKF------------------------------------------------------
+------------------------------------------------------------
+--CR-----Q
> 33=p A35181 serotonin receptor class 1A - rat
-M-----------------------------------------------------------
---------------------DVFSFGQGNNTTASQEPFGTGGNVTSISD-----------
---VTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVL
-V-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDY
-VNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTP---EDRSDPDACTISKDH------
-----GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGAGTSLGTSSAPP
-PKK---------SLNGQPGSGDWRRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGN
-SKEHLPLPSESGSNSYAPACLERKNERNA-------------------------------
-EAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSCH-MPALLGAIINW
-LGYSNSLLNPVIYAYFNKDFQNAFKKII-----KCKF-----------------------
-------------------------------------------------------------
---------------------------------------CRR
+MDVFSFG-----------------------------------------------------
+-------------------------QGNNTT------ASQEPF------GTGGNVTSISD
+-------------VTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGS
+LAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALD
+RYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACT
+ISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKVEKKGA
+GTSLGTSSAPPPKK-----SLNGQ--PGSGDWRRCAENRAVGTPCT--------------
+-NGAVRQGD---------------------------------DEATLEVIEVHRVGNSKE
+HLPLPSESG--SNSYAPACLERKNERNAEAKRKMALARERKTVKTLGIIMGTFILCWLPF
+FIVALVLPF---CESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKII---
+--KCKF------------------------------------------------------
+------------------------------------------------------------
+--CR-----R
> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
-MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS
-HSSRSRD-----------------NASANDTSATNM---TDDRYWSLTV-----------
---YSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVL
-V-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDY
-IRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDP--NNDPDKTGTCIISQDK------
-----GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASP
-KTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSS
-SSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYS-------------
-RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF--VDPEG-IPPFARSFVLW
-LGYFNSLLNPIIYTIFSPEFRSAFQKIL-----FGKY-----------------------
-------------------------------------------------------------
-------------------------------------RRGHR
+MANFTFGDLALDVARMGGLASTPSGLRSTGL-----TTPGLSPTGLVTSDFNDSYGLTGQ
+FINGSHSSRSRD-----------NASANDTS------ATNMT---------DDRYWSLTV
+-------------YSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILS
+LAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMD
+RYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCI
+ISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARL
+KTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLP
+ENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNGKK
+-------------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWLPF
+FIIALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL---
+--FGKY------------------------------------------------------
+------------------------------------------------------------
+-----RRGHR
> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
-MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS
-HSSRSRD-----------------NASANDTSATNM---TDDRYWSLTV-----------
---YSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVL
-V-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDY
-IRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDP--NNDPDKTGTCIISQDK------
-----GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASP
-KTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSS
-SSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYS-------------
-RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF--VDPEG-IPPFARSFVLW
-LGYFNSLLNPIIYTIFSPEFRSAFQKIL-----FGKY-----------------------
-------------------------------------------------------------
-------------------------------------RRGHR
+MANFTFGDLALDVARMGGLASTPSGLRSTGL-----TTPGLSPTGLVTSDFNDSYGLTGQ
+FINGSHSSRSRD-----------NASANDTS------ATNMT---------DDRYWSLTV
+-------------YSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILS
+LAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMD
+RYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCI
+ISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARL
+KTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNRAKKLP
+ENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQCNNGKK
+-------------------ISSNDTPYSRTREKLELKRERKAARTLAIITGAFLICWLPF
+FIIALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKIL---
+--FGKY------------------------------------------------------
+------------------------------------------------------------
+-----RRGHR
> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi
-M--------------------------------------------------------EGA
-EGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVV---PNTTWWQASAPFDTP------
---AALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACL
-V-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDY
-IHASTAKRVGMMIACVWTVSFFV-CIAQLL-GWKDPDWNQRVSEDLRCVVSQDV------
-----GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGATARGGV--------GPP
-PVPAGGALVAGGGSGG------------------------------------IAAAVVAV
-IGRPLPTISETTTTGFTNVSSNNTSPE--KQSCANGLEADPPTTGYGAVAAAYYPSLVRR
-KPKEAADSKRERKAAKTLAIITGAFVACWLPFFVLAILVPT---CDCE-VSPVLTSLSLW
-LGYFNSTLNPVIYTVFSPEFRHAFQRLL-----CGRR-----------------------
-------------------------------------------------------------
----------------------------------VRRRRAPQ
+ME----------------------------------------------------------
+---GAEGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTVVP---------NTTWWQASA
+PFDTP--------AALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILS
+LAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALD
+RYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCV
+VSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGATARGGV
+GP-------PP-----------------------------------------------VP
+AGGALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSPE---KQSCANGLE
+ADPPTTGYGAVAAAYYPSLVRR------KPKEAADSKRERKAAKTLAIITGAFVACWLPF
+FVLAILVPT---CDCE--VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLL---
+--CGRR------------------------------------------------------
+------------------------------------------------------------
+--VRRRRAPQ
--- /dev/null
+> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
+M-----------------------------------------------------------
+---NGTEGDNFYVPFSNKTGLAR-------------------------------SP----
+-------------YEY--------------------------------------------
+------------------------------------------------------------
+------PQY-------------------YLAEPW---------------------KYSAL
+AAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSM
+N-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGNFRF-GNTHAIM
+GVAFTWIMALAC-AAPPLV-GWSR-------YIPEGMQCSCGPDYYTLNPNFNNESYVVY
+MFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQ----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------ESASTQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-HQGS-D-FGA
+TFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC----------------------
+-------------------GKNP-LGDDE-SG-A--------------------------
+------------------------------------------------------------
+--------------------------------------------------------STSK
+TEVSSVSTSPVSPA
+> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]
+M-----------------------------------------------------------
+---NGTEGPNFYVPFSNITGVVR-------------------------------SP----
+-------------FEQ--------------------------------------------
+------------------------------------------------------------
+------PQY-------------------YLAEPW---------------------QFSML
+AAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSL
+H-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSNFRF-GENHAIM
+GVAFTWVMALAC-AAPPLV-GWSR-------YIPEGMQCSCGIDYYTLKPEVNNESFVIY
+MFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQ----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------ESATTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-HQGS-N-FGP
+IFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC----------------------
+-------------------GKNP-LGDDE-AS-A--------------------------
+------------------------------------------------------------
+--------------------------------------------------------TASK
+TE-----TSQVAPA
+> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9
+M-----------------------------------------------------------
+---NGTEGINFYVPMSNKTGVVR-------------------------------SP----
+-------------FEY--------------------------------------------
+------------------------------------------------------------
+------PQY-------------------YLAEPW---------------------KYRLV
+CCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAW
+N-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGNFRF-SATHAMM
+GIAFTWVMAFSC-AAPPLF-GWSR-------YMPEGMQCSCGPDYYTHNPDYHNESYVLY
+MFVIHFIIPVVVIFFSYGRLICKVREAAAQQQ----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------ESATTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-NKGA-D-FTA
+TLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC----------------------
+-------------------GKNP-FGDEDVSSTV--------------------------
+------------------------------------------------------------
+--------------------------------------------------------SQSK
+TEVSSVSSSQVSPA
+> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
+M-----------------------------------------------------------
+---NGTEGKNFYVPMSNRTGLVR-------------------------------SP----
+-------------FEY--------------------------------------------
+------------------------------------------------------------
+------PQY-------------------YLAEPW---------------------QFKIL
+ALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAI
+N-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSFKF-SSSHAFA
+GIAFTWVMALAC-AAPPLF-GWSR-------YIPEGMQCSCGPDYYTLNPDYNNESYVIY
+MFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQ----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------DSASTQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFF-NKGA-D-FSA
+KFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC----------------------
+-------------------GKNP-LGDDE-SSTV--------------------------
+------------------------------------------------------------
+--------------------------------------------------------STSK
+TEVSS-----VSPA
+> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
+M-----------------------------------------------------------
+---NGTEGNNFYVPLSNRTGLVR-------------------------------SP----
+-------------FEY--------------------------------------------
+------------------------------------------------------------
+------PQY-------------------YLAEPW---------------------QFKLL
+AVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAI
+N-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSFKF-SSTHASA
+GIAFTWVMAMAC-AAPPLV-GWSR-------YIPEGIQCSCGPDYYTLNPEYNNESYVLY
+MFICHFILPVTIIFFTYGRLVCTVKAAAAQQQ----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------DSASTQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFF-NKGA-A-FSA
+QFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC----------------------
+-------------------GKNP-LGDEE-SSTV--------------------------
+------------------------------------------------------------
+--------------------------------------------------------STSK
+TEVSS-----VSPA
+> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208-
+MKQ---------------------------------------------------------
+------------------------VPEFH---EDFYIP-----IPLDINNLSAYSP----
+-------------FLV--------------------------------------------
+------------------------------------------------------------
+------PQD-------------------HLGNQG---------------------IFMAM
+SVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFF
+N-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGNFTF-KTPHAIA
+GCILPWISALAA-SLPPLF-GWSR-------YIPEGLQCSCGPDWYTTNNKYNNESYVMF
+LFCFCFAVPFGTIVFCYGQLLITLKLAAKAQA----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------DSASTQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-HRGE-E-FDL
+RMATIPSCLSKASTVYNPVIYVLMNKQFRSCMMKMVC-----------------------
+-------------------GKN--IEEDE-AS-T--------------------------
+------------------------------------------------------------
+--------------------------------------------------------SSQV
+TQVSSVAPE----K
+> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
+M-----------------------------------------------------------
+----------------------RKMSE-----EEFYL----------FKNISSVGP----
+-------------WDG--------------------------------------------
+------------------------------------------------------------
+------PQY-------------------HIAPVW---------------------AFYLQ
+AAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASC
+N-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGNFRF-SSKHALT
+VVLATWTIGIGV-SIPPFF-GWSR-------FIPEGLQCSCGPDWYTVGTKYRSESYTWF
+LFIFCFIVPLSLICFSYTQLLRALKAVAAQQQ----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------ESATTQKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-NRNH-G-LDL
+RLVTIPSFFSKSACIYNPIIYCFMNKQFQACIMKMVC-----------------------
+-------------------GKA--MTDES-DT-C--------------------------
+------------------------------------------------------------
+--------------------------------------------------------SSQK
+TEVSTVSSTQVGPN
+> 8=opsin, greensensitive human (fragment) S07060
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------DLAETVIA-STISIVNQV
+S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNVRF-DAKLAIV
+GIAFSWIWAAVW-TAPPIF-GWSR-------YWPHGLKTSCGPDVFSGSSYPGVQSYMIV
+LMVTCCITPLSIIVLCYLQVWLAIRAVAKQQK----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------ESESTQKAEKEVTRMVVVMVLAFC--------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+--------------
+> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
+MAQ------QWSLQRLAGRHPQDSYEDSTQS-------------------SIFTYTNS--
+---NST----------------R-------------------------------GP----
+-------------FEG--------------------------------------------
+------------------------------------------------------------
+------PNY-------------------HIAPRW---------------------VYHLT
+SVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQV
+Y-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNVRF-DAKLAIV
+GIAFSWIWAAVW-TAPPIF-GWSR-------YWPHGLKTSCGPDVFSGSSYPGVQSYMIV
+LMVTCCITPLSIIVLCYLQVWLAIRAVAKQQK----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------ESESTQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-NPGY-P-FHP
+LMAALPAFFAKSATIYNPVIYVFMNRQFRNCILQLF------------------------
+-------------------GKK--VDDGS-EL-S--------------------------
+------------------------------------------------------------
+--------------------------------------------------------SASK
+TEVSSV--SSVSPA
+> 10== Z68193 1 human Red Opsin <>[]
+MAQ------QWSLQRLAGRHPQDSYEDSTQS-------------------SIFTYTNS--
+---NST----------------R-------------------------------GP----
+-------------FEG--------------------------------------------
+------------------------------------------------------------
+------PNY-------------------HIAPRW---------------------VYHLT
+SVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQV
+S-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNVRF-DAKLAIV
+GIAFSWIWSAVW-TAPPIF-GWSR-------YWPHGLKTSCGPDVFSGSSYPGVQSYMIV
+LMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQK----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------ESESTQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-NPGY-A-FHP
+LMAALPAYFAKSATIYNPVIYVFMNRQFRNCILQLF------------------------
+-------------------GKK--VDDGS-EL-S--------------------------
+------------------------------------------------------------
+--------------------------------------------------------SASK
+TEVSSV--SSVSPA
+> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92]
+MTE------AWNV--------------------AVFAARRSRDDDDTTRGSVFTYTNT--
+---NNT----------------R-------------------------------GP----
+-------------FEG--------------------------------------------
+------------------------------------------------------------
+------PNY-------------------HIAPRW---------------------VYNLV
+SFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQI
+F-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNIKF-DSKLAII
+GIVFSWVWAWGW-SAPPIF-GWSR-------YWPHGLKTSCGPDVFSGSVELGCQSFMLT
+LMITCCFLPLFIIIVCYLQVWMAIRAVAAQQK----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------ESESTQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAA-NPGY-A-FHP
+LAAALPAYFAKSATIYNPVIYVFMNRQFRNCIMQLF------------------------
+-------------------GKK--VDDGS-EA-S--------------------------
+------------------------------------------------------------
+--------------------------------------------------------TTSR
+TEVSSVSNSSVAPA
+> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
+MA----------------------------AWEAAFAARRRHEEEDTTRDSVFTYTNS--
+---NNT----------------R-------------------------------GP----
+-------------FEG--------------------------------------------
+------------------------------------------------------------
+------PNY-------------------HIAPRW---------------------VYNLT
+SVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQI
+S-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNIKF-DGKLAVA
+GILFSWLWSCAW-TAPPIF-GWSR-------YWPHGLKTSCGPDVFSGSSDPGVQSYMVV
+LMVTCCFFPLAIIILCYLQVWLAIRAVAAQQK----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------ESESTQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-NPGY-A-FHP
+LAAALPAYFAKSATIYNPIIYVFMNRQFRNCILQLF------------------------
+-------------------GKK--VDDGS-EV-S--------------------------
+------------------------------------------------------------
+--------------------------------------------------------T-SR
+TEVSSVSNSSVSPA
+> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
+MS-----------------------------------------------------SNSSQ
+APPNGTP-----------------------------------------------GP----
+-------------FDG--------------------------------------------
+------------------------------------------------------------
+------PQW------------------PYQAPQS---------------------TYVGV
+AVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNI
+N-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGDFQF-QRRHAVS
+GCAFTWGWALLW-SAPPLL-GWSS-------YVPEGLRTSCGPNWYTGGS--NNNSYILS
+LFVTCFVLPLSLILFSYTNLLLTLRAAAAQQK----------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------EADTTQRAEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-HKGI-I-IQP
+VLASLPSYFSKTATVYNPIIYVFMNKQFQSCLLEMLCC----------------------
+-------------------GYQ------------P-----------------------QR
+TGKASPGTPGPHADVTAAGLRNKVM-----------------------------------
+-------------------------------------------------------P----
+----------AHPV
+> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
+ME----------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------------------------SGNVSSSLFGNVSTALR-PEARLSAETRL
+LGW-----------------NVPPEELRHIPEHWLTYPEPP------------ESMNYLL
+GTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYNSF
+H-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG-KM-THGKAIA
+MIIFIYMYATPW-VVACYTETWGR-------FVPEGYLTSCTFDYLTDN--FDTRLFVAC
+IFFFSFVCPTTMITYYYSQIVGHVFSHEKALR----------------------------
+------------DQAKKMNVESLRSN----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------VDKNKETAEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-GDKT-L-LTP
+GATMIPACACKMVACIDPFVYAISHPRYRMELQKRCPWLALNE-----------------
+-----------------------------------------------------------K
+APES--------------------------------------------------------
+--------------------------------------------------------SAVA
+STSTTQEPQQTTAA
+> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
+ME----------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------YHNVSSVLGNVSSV---------------LR-PDARLSAESRL
+LGW-----------------NVPPDELRHIPEHWLIYPEPP------------ESMNYLL
+GTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYNSF
+H-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG-KM-THGKAIA
+MIIFIYLYATPW-VVACYTESWGR-------FVPEGYLTSCTFDYLTDN--FDTRLFVAC
+IFFFSFVCPTTMITYYYSQIVGHVFSHEKALR----------------------------
+------------DQAKKMNVDSLRSN----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------VDKSKEAAEIRIAKAAITICFLFFASWTPYGVMSLIGAF-GDKT-L-LTP
+GATMIPACTCKMVACIDPFVYAISHPRYRMELQKRCPWLAISE-----------------
+-----------------------------------------------------------K
+APES--------------------------------------------------------
+--------------------------------------------------------RAAI
+STSTTQEQQQTTAA
+> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
+MEPL--------------------------------------------------------
+--------------------------------------------------CNASEP----
+----------------------------------------------PLRPEAR-SSG---
+------------NGDLQF------------------------------------------
+LGW-----------------NVPPDQIQYIPEHWLTQLEPP------------ASMHYML
+GVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--NSF
+H-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR-NM-TFTKAVI
+MNIIIWLYCTPW-VVLPLTQFWDR-------FVPEGYLTSCSFDYLSDN--FDTRLFVGT
+IFFFSFVCPTLMILYYYSQIVGHVFSHEKALR----------------------------
+------------EQAKKMNVESLRSN----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------VDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-GDKS-L-LTQ
+GATMIPACTCKLVACIDPFVYAISHPRYRLELQKRCPWLGVNE-----------------
+-----------------------------------------------------------K
+SGEI--------------------------------------------------------
+--------------------------------------------------------SSAQ
+STTTQEQ-QQTTAA
+> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
+MDAL--------------------------------------------------------
+--------------------------------------------------CNASEP----
+----------------------------------------------PLRPEARMSSG---
+------------SDELQF------------------------------------------
+LGW-----------------NVPPDQIQYIPEHWLTQLEPP------------ASMHYML
+GVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYNSF
+H-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR-NM-TFTKAVI
+MNIIIWLYCTPW-VVLPLTQFWDR-------FVPEGYLTSCSFDYLSDN--FDTRLFVGT
+IFLFSFVVPTLMILYYYSQIVGHVFNHEKALR----------------------------
+------------EQAKKMNVESLRSN----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------VDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-GDKS-L-LTP
+GATMIPACTCKLVACIEPFVYAISHPRYRMELQKRCPWLGVNE-----------------
+-----------------------------------------------------------K
+SGEA--------------------------------------------------------
+--------------------------------------------------------SSAQ
+STTTQEQTQQTSAA
+> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1
+MT----------------------------------------------------------
+---------------------------------------------------NATGP----
+------------------------------------------------------------
+--------------QMAYYGAAS-------------------------------------
+MDFGY-P------EGVSIVDFVRPEIKPYVHQHWYNYPPVN------------PMWHYLL
+GVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCF
+SGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGPKL-TTGKAVV
+FALISWVIAIGC-ALPPFF-GWGN-------YILEGILDSCSYDYLTQD--FNTFSYNIF
+IFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMR----------------------------
+------------AQAKKMNVSTLRS-----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------NEADAQRAEIRIAKTALVNVSLWFICWTPYALISLKGVM-GDTS-G-ITP
+LVSTLPALLAKSCSCYNPFVYAISHPKYRLAITQHLPWFCVHE-----------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------TETKSNDDSQSN
+STVA------QDKA
+> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1
+MA----------------------------------------------------------
+---------------------------------------------------NVTGP----
+------------------------------------------------------------
+--------------QMAFYGSGA-------------------------------------
+ATFGY-P------EGMTVADFVPDRVKHMVLDHWYNYPPVN------------PMWHYLL
+GVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCF
+SGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGPKL-TQGKATF
+MCGLAWVISVGW-SLPPFF-GWGS-------YTLEGILDSCSYDYFTRD--MNTITYNIC
+IFIFDFFLPASVIVFSYVFIVKAIFAHEAAMR----------------------------
+------------AQAKKMNVTNLRS-----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------NEAETQRAEIRIAKTALVNVSLWFICWTPYAAITIQGLL-GNAE-G-ITP
+LLTTLPALLAKSCSCYNPFVYAISHPKFRLAITQHLPWFCVHE-KDPNDVEENQSSN---
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------TQT---------
+----------QEKS
+> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
+ME----------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------SFAVAAAQLG-------------------------------------------
+------PHFAP-LSNGSVVDKVTPDMAHLISPYWNQFPAMD------------PIWAKIL
+TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY
+F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPM-TIPLALG
+KM-----------------------------YVPEGNLTSCGIDYLERD--WNPRSYLIF
+YSIFVYYIPLFLICYSYWFIIAAVSAHEKAMR----------------------------
+------------EQAKKMNVKSLRS-----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF-KF-E-G-LTP
+LNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFG------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------------------------------------KVDDGKSSDAQSQ
+ATAS----EAESKA
+> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
+ME----------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------SFAVAAAQLG-------------------------------------------
+------PHFAP-LSNGSVVDKVTPDMAHLISPYWNQFPAMD------------PIWAKIL
+TAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY
+F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPM-TIPLALG
+KIAYIWFMSSIW-CLAPAF-GWSR-------YVPEGNLTSCGIDYLERD--WNPRSYLIF
+YSIFVYYIPLFLICYSYWFIIAAVSAHEKAMR----------------------------
+------------EQAKKMNVKSLRS-----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF-KF-E-G-LTP
+LNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFG------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------------------------------------KVDDGKSSDAQSQ
+ATAS----EAESKA
+> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
+MD----------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------SFAAVATQLG-------------------------------------------
+------PQFAA-PSNGSVVDKVTPDMAHLISPYWDQFPAMD------------PIWAKIL
+TAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLY
+F-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGRPM-TIPLALG
+KIAYIWFMSTIWCCLAPVF-GWSR-------YVPEGNLTSCGIDYLERD--WNPRSYLIF
+YSIFVYYIPLFLICYSYWFIIAAVSAHEKAMR----------------------------
+------------EQAKKMNVKSLRS-----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------SEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMGLF-KF-E-G-LTP
+LNTIWGACFAKSAACYNPIVYGISHPKYRLALKEKCPCCVFG------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-----------------------------------------------KVDDGKSSEAQSQ
+ATTS----EAESKA
+> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
+MERSHLPETPFDL-----------------------------------------------
+---------------------------------------------------AHSGP----
+------------------------------------------------RFQAQ-SSG---
+------------------------------------------------------------
+--------------NGSVLDNVLPDMAHLVNPYWSRFAPMD------------PMMSKIL
+GLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY
+Y-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPM-TIKTSIM
+KILFIWMMAVFW-TVMPLI-GWSA-------YVPEGNLTACSIDYMTRM--WNPRSYLIT
+YSLFVYYTPLFLICYSYWFIIAAVAAHEKAMR----------------------------
+------------EQAKKMNVKSLRS-----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------SEDCDKSAEGKLAKVALTTISLWFMAWTPYLVICYFGLF-KI-D-G-LTP
+LTTIWGATFAKTSAVYNPIVYGISHPKYRIVLKEKCPMCVFGN---------------TD
+EPKPDAP-----------------------ASDTE-------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-TTS----EADSKA
+> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
+MERSLLPEPPLAM-----------------------------------------------
+---------------------------------------------------ALLGP----
+------------------------------------------------RFEAQ-TGG---
+------------------------------------------------------------
+--------------NRSVLDNVLPDMAPLVNPHWSRFAPMD------------PTMSKIL
+GLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFY
+Y-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGTPM-TIKTSIM
+KIAFIWMMAVFW-TIMPLI-GWSS-------YVPEGNLTACSIDYMTRQ--WNPRSYLIT
+YSLFVYYTPLFMICYSYWFIIATVAAHEKAMR----------------------------
+------------DQAKKMNVKSLRS-----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------SEDCDKSAENKLAKVALTTISLWFMAWTPYLIICYFGLF-KI-D-G-LTP
+LTTIWGATFAKTSAVYNPIVYGISHPNDRLVLKEKCPMCVCGT---------------TD
+EPKPDAP-----------------------PSDTE-------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-TTS----EAESKD
+> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
+MI----------------------------------------------------------
+---------------------------------------------------AVSGP----
+------------------------------------------------SYEAF-SYG---
+-------GQARFNNQ---------------------------------------------
+----------------TVVDKVPPDMLHLIDANWYQYPPLN------------PMWHGIL
+GFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCY
+Y-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGKPL-SINGALI
+RIIAIWLFSLGW-TIAPMF-GWNR-------YVPEGNMTACGTDYFNRG--LLSASYLVC
+YGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMR----------------------------
+------------EQAKKMNVASLRS-----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------SENQNTSAECKLAKVALMTISLWFMAWTPYLVINFSGIF-NL-V-K-ISP
+LFTIWGSLFAKANAVYNPIVYGISHPKYRAALFAKFPSLACAA-----------------
+-------EPSSDA-----------------------------------------------
+------------------------------------------------------------
+--------------------------------------------------------VSTT
+SGTTTVTDNEKSNA
+> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
+MA----------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------NQLSY---SS-------------------------------------
+LGWPYQP-------NASVVDTMPKEMLYMIHEHWYAFPPMN------------PLWYSIL
+GVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCF
+A-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAAPL-THKKATL
+LLLFVWIWSGGW-TILPFF-GWSR-------YVPEGNLTSCTVDYLTKD--WSSASYVVI
+YGLAVYFLPLITMIYCYFFIVHAVAEHEKQLR----------------------------
+------------EQAKKMNVASLRAN----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+----------ADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-SSGT-R-LTP
+LATIWGSVFAKANSCYNPIVYGISHPRYKAALYQRFPSLACGSGESGSDVKSEASATTTM
+EEKPKIPE----------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------------A
+> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
+MV----------------------------------------------------------
+---------------------------------------------------EST------
+------------------------------------------------------------
+-----------------------TLVN---------------------------------
+QTWWYNPTV-------------------DIHPHWAKFDPIP------------DAVYYSV
+GIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAF
+M-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFL
+MIIFVWMWSIVW-SVGPVF-NWGA-------YVPEGILTSCSFDYLSTD--PSTRSFILC
+MYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMA----------------------------
+------------AMAKRLNAKELRKA----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------QAGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQF-GPAE-W-VTP
+YAAELPVLFAKASAIHNPIVYSVSHPKFREAIQTTFPWL---------------------
+------------LTCCQFDEKE--CED--------------------ANDAEEEVVASER
+GGESR----------DAAQMKEMMAMMQKMQAQQAAYQP---PPPPQGYPPQGYPPQGAY
+PPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQ-------------
+-------------A
+> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93]
+MGRD--------------------------------------------------------
+-------------------------------LRD--------------------------
+------------------------------------------------------------
+--------------------------N---------------------------------
+ETWWYNPSI-------------------VVHPHWREFDQVP------------DAVYYSL
+GIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCF
+L-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFI
+MIIFVWLWSVLW-AIGPIF-GWGA-------YTLEGVLCNCSFDYISRD--STTRSNILC
+MFILGFFGPILIIFFCYFNIVMSVSNHEKEMA----------------------------
+------------AMAKRLNAKELRKA----------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------QAGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-GPLE-W-VTP
+YAAQLPVMFAKASAIHNPMIYSVSHPKFREAISQTFPWV---------------------
+------------LTCCQFDDKE--TEDDK-DAETEIPAGESSDAAPSA------------
+---------------DAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGY-
+-PPQGYPPQGYPP---PPQGAPPQGAPP------AAPPQGVDNQAYQ-------------
+-------------A
+> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra
+MMDV--------------------------------------------------------
+----------------NSSG--R--PDLYGHLRSFLLPEVGRGLP-------DLSPDG--
+----------------------------GADPVA--------------------------
+------------------------------------------------------------
+GSW--APHLLS---EVTASPAPT----------W--DAPPDNASGCGEQINYGRVEKVVI
+GSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL
+IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK
+MILSVWLLSASI-TLPPLF-GWAQ-------NVNDDKVCLISQDF----------GYTIY
+STAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFP--------------------------
+------------------------------------------------------------
+------------------------------------------------------GFPRVE
+PDSVI---------------ALNGIV----------------------------------
+--------------KLQKEV------------------EECAN-----------------
+-LSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPL
+WVERTFLWLGYANSLINPFIYAFFNRDLRTTYRSLLQ-----------------------
+-------------------CQYRNINRKL-SA----------------------------
+-----------------AGMHEALKLAERPER----------------------------
+--------------------------------------------------------PEFV
+LQNADYCRKKGHDS
+> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
+M-----------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------PHLLSGFLEVTASPAPT----------W--DAPPDNVSGCGEQINYGRVEKVVI
+GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL
+IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK
+MILSVWLLSASI-TLPPLF-GWAQ-------NVNDDKVCLISQDF----------GYTIY
+STAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFP--------------------------
+------------------------------------------------------------
+------------------------------------------------------GFPRVQ
+PESVI---------------SLNGVV----------------------------------
+--------------KLQKEV------------------EECAN-----------------
+-LSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPL
+WVERTCLWLGYANSLINPFIYAFFNRDLRPTSRSLLQ-----------------------
+-------------------CQYRNINRKL-SA----------------------------
+-----------------AGMHEALKLAERPER----------------------------
+--------------------------------------------------------SEFV
+LQNSDHCGKKGHDT
+> 31=p A47425 serotonin receptor 5HT-7 - rat
+M-----------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------PHLLSGFLEVTASPAPT----------W--DAPPDNVSGCGEQINYGRVEKVVI
+GSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDL
+IGGKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAK
+MILSVWLLSASI-TLPPLF-GWAQ-------NVNDDKVCLISQDF----------GYTIY
+STAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFP--------------------------
+------------------------------------------------------------
+------------------------------------------------------GFPRVQ
+PESVI---------------SLNGVV----------------------------------
+--------------KLQKEV------------------EECAN-----------------
+-LSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFICGTSCSCIPL
+WVERTCLWLGYANSLINPFIYAFFNRDLRTTYRSLLQ-----------------------
+-------------------CQYRNINRKL-SA----------------------------
+-----------------AGMHEALKLAERPER----------------------------
+--------------------------------------------------------SEFV
+LQNSDHCGKKGHDT
+> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
+MDVLSPG-----------------------------------------------------
+------------------------------------------------------------
+---------------------------QGNNTTS--------------PPAPFETGG---
+------------------------------------------------------------
+--------------NTTGISDVT------------------------------VSYQVIT
+SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV
+L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA
+LISLTWLIGFLI-SIPPML-GWRTP--EDRSDPD---ACTISKDH----------GYTIY
+STFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------------
+---------TVKKVEK----------------------------------------TGAD
+TRHGASPAPQPKKS--------------------------------------------VN
+GESGSRNWRLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTP
+CAPASFERKNER-------NAEA-------------------------------------
+----------KRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSCH-MPT
+LLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIK-----------------------
+-------------------CKF--------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------CR--Q
+> 33=p A35181 serotonin receptor class 1A - rat
+MDVFSFG-----------------------------------------------------
+------------------------------------------------------------
+---------------------------QGNNTTA--------------SQEPFGTGG---
+------------------------------------------------------------
+--------------NVTSISDVT------------------------------FSYQVIT
+SLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQV
+L-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAA
+LISLTWLIGFLI-SIPPML-GWRTP--EDRSDPD---ACTISKDH----------GYTIY
+STFGAFYIPLLLMLVLYGRIFRAARFRIRK------------------------------
+---------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGDWRRCAENRAVGTP----
+------------------------------------------------------------
+--------------------CTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNS
+YAPACLERKNER-------NAEA-------------------------------------
+----------KRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-CESSCH-MPA
+LLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFKKIIK-----------------------
+-------------------CKF--------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------CR--R
+> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
+MANFTFGDLALDVARMGGLAS---------------------------------------
+----------------TPSG----------------LRSTGLTTP-------GLSPTGLV
+TSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHE---------
+------------------------------------------------------------
+-------------------------------------------------------HLVLT
+SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI
+S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL
+MIMVVWIVALFI-SIPPLF-GWRDP--NN--DPDKTGTCIISQDK----------GYTIF
+STVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSD
+CNGCNSPDSTTEKKKRRAPFKS--------------------------------------
+--YGCSPRPERKKNRAKKLPENAN---------------------------------GVN
+SNSSS-------------------------------------------------------
+---------SERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYS-----
+--------RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-VDPE-G-IPP
+FARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILF-----------------------
+-------------------GKY--------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------RRGHR
+> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
+MANFTFGDLALDVARMGGLAS---------------------------------------
+----------------TPSG----------------LRSTGLTTP-------GLSPTGLV
+TSDFNDSYGLTGQFINGSHSSRSRDNASANDTSATNMTDDRYWSLTVYSHE---------
+------------------------------------------------------------
+-------------------------------------------------------HLVLT
+SVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEI
+S-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILL
+MIMVVWIVALFI-SIPPLF-GWRDP--NN--DPDKTGTCIISQDK----------GYTIF
+STVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSD
+CNGCNSPDSTTEKKKRRAPFKS--------------------------------------
+--YGCSPRPERKKNRAKKLPENAN---------------------------------GVN
+SNSSS-------------------------------------------------------
+---------SERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSNDTPYS-----
+--------RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-VDPE-G-IPP
+FARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQKILF-----------------------
+-------------------GKY--------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+---------RRGHR
+> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi
+ME----------------------------------------------------------
+------------------------------------------------------------
+-----------------------------------------------------GAEGQEE
+LDWEALY---------------------------------------LRLPLQNCSWN--S
+TGW--EPNW-----NVTVVPNTTW---------WQASAPFDTPAA--------LVRAAAK
+AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEV
+V-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGM
+MIACVWTVSFFV-CIAQLL-GWKDPDWNQ--RVSEDLRCVVSQDV----------GYQIF
+ATASSFYVPVLIILILYWRIYQTARKRIRRRR--G-------------------------
+----------------------------------------------------------AT
+ARGGVGPPPV----P----AGGALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVS
+SNNTS-------------------------------------------------------
+---------PEK--------------------------QSCANGLEADPPTTGYGAVAAA
+YYPSLVRRKPKEAADSKRERKAAKTLAIITGAFVACWLPFFVLAILVPT-CD--CE-VSP
+VLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQRLLC-----------------------
+-------------------GRRV-------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+-------RRRRAPQ
> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
-MN--------------------GTE--GDNFYVPFSNKTG--LARSP-------------
---------------------YEYPQY-YLAEPW---------KYSA-----LAAYMFFLI
-LVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSMN-GYFV--
-FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGNF-RFGNTHAIMGVAFTWIM
-ALAC-AAPPLV-GWS-----RYIPEGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPF
-VIIFFCYGRLLCTVKEAAAAQQESA-----------------------------------
-------------------------------------------------------------
-----------------------------------------------STQKAEKEVTRMVV
-LMVIGFLVCWVPYASVAFYIFT--HQGSD-FGATFMTLPAFFAKSSALYNPVIYILMNKQ
-FRNCMITTL-----CCGKNPLGDDE-SG-ASTSKTEVS----------------------
-------------------------------------------------------------
-----------------SVS-TSPVSPA---
+-------------------------MNGTE--G-------------------DNFYVPFS
+NKTG--------------------------------LARSPYEYPQY-------------
+---YLAEPW---------KYSA-----LAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPL
+NYILLNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSL
+VVLAIERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLV-GWS-----RYIP
+EGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQES
+A-----------------------------------------------------------
+------------------------------------------------------------
+----------------------STQKAEKEVTRMVVLMVIGFLVCWVPYASVAFYIFT-H
+QGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMITTLCC------GKNPLGD
+DE-SG-ASTSK-TEVSSVS--TSPVSPA--------------------------------
+-----------------------------------------------------------
> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]
-MN--------------------GTE--GPNFYVPFSNITG--VVRSP-------------
---------------------FEQPQY-YLAEPW---------QFSM-----LAAYMFLLI
-VLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSLH-GYFV--
-FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSNF-RFGENHAIMGVAFTWVM
-ALAC-AAPPLV-GWS-----RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPM
-IVIFFCYGQLVFTVKEAAAQQQESA-----------------------------------
-------------------------------------------------------------
-----------------------------------------------TTQKAEKEVTRMVI
-IMVIFFLICWLPYASVAMYIFT--HQGSN-FGPIFMTLPAFFAKTASIYNPIIYIMMNKQ
-FRNCMLTSL-----CCGKNPLGDDE-AS-ATASKTETS----------------------
-------------------------------------------------------------
-----------------QVA-PA--------
+-------------------------MNGTE--G-------------------PNFYVPFS
+NITG--------------------------------VVRSPFEQPQY-------------
+---YLAEPW---------QFSM-----LAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPL
+NYILLNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSL
+VVLAIERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLV-GWS-----RYIP
+EGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQES
+A-----------------------------------------------------------
+------------------------------------------------------------
+----------------------TTQKAEKEVTRMVIIMVIFFLICWLPYASVAMYIFT-H
+QGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCMLTSLCC------GKNPLGD
+DE-AS-ATASK-TETSQVA--PA-------------------------------------
+-----------------------------------------------------------
> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9
-MN--------------------GTE--GINFYVPMSNKTG--VVRSP-------------
---------------------FEYPQY-YLAEPW---------KYRL-----VCCYIFFLI
-STGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAWN-GYFV--
-FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGNF-RFSATHAMMGIAFTWVM
-AFSC-AAPPLF-GWS-----RYMPEGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPV
-VVIFFSYGRLICKVREAAAQQQESA-----------------------------------
-------------------------------------------------------------
-----------------------------------------------TTQKAEKEVTRMVI
-LMVLGFMLAWTPYAVVAFWIFT--NKGAD-FTATLMAVPAFFSKSSSLYNPIIYVLMNKQ
-FRNCMITTI-----CCGKNPFGDEDVSSTVSQSKTEVS----------------------
-------------------------------------------------------------
-----------------SVS-SSQVSPA---
+-------------------------MNGTE--G-------------------INFYVPMS
+NKTG--------------------------------VVRSPFEYPQY-------------
+---YLAEPW---------KYRL-----VCCYIFFLISTGLPINLLTLLVTFKHKKLRQPL
+NYILVNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSL
+VVLAIERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLF-GWS-----RYMP
+EGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQES
+A-----------------------------------------------------------
+------------------------------------------------------------
+----------------------TTQKAEKEVTRMVILMVLGFMLAWTPYAVVAFWIFT-N
+KGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMITTICC------GKNPFGD
+EDVSSTVSQSK-TEVSSVS--SSQVSPA--------------------------------
+-----------------------------------------------------------
> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
-MN--------------------GTE--GKNFYVPMSNRTG--LVRSP-------------
---------------------FEYPQY-YLAEPW---------QFKI-----LALYLFFLM
-SMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--
-LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSF-KFSSSHAFAGIAFTWVM
-ALAC-AAPPLF-GWS-----RYIPEGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPV
-AVIFFTYGRLVCTVKAAAAQQQDSA-----------------------------------
-------------------------------------------------------------
-----------------------------------------------STQKAEREVTKMVI
-LMVFGFLIAWTPYATVAAWIFF--NKGAD-FSAKFMAIPAFFSKSSALYNPVIYVLLNKQ
-FRNCMLTTI-----FCGKNPLGDDE-SSTVSTSKTEVS----------------------
-------------------------------------------------------------
-----------------SVS-PA--------
+-------------------------MNGTE--G-------------------KNFYVPMS
+NRTG--------------------------------LVRSPFEYPQY-------------
+---YLAEPW---------QFKI-----LALYLFFLMSMGLPINGLTLVVTAQHKKLRQPL
+NFILVNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSL
+VVLAIERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLF-GWS-----RYIP
+EGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDS
+A-----------------------------------------------------------
+------------------------------------------------------------
+----------------------STQKAEREVTKMVILMVFGFLIAWTPYATVAAWIFF-N
+KGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCMLTTIFC------GKNPLGD
+DE-SSTVSTSK-TEVSSVS--PA-------------------------------------
+-----------------------------------------------------------
> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
-MN--------------------GTE--GNNFYVPLSNRTG--LVRSP-------------
---------------------FEYPQY-YLAEPW---------QFKL-----LAVYMFFLI
-CLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--
-LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSF-KFSSTHASAGIAFTWVM
-AMAC-AAPPLV-GWS-----RYIPEGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPV
-TIIFFTYGRLVCTVKAAAAQQQDSA-----------------------------------
-------------------------------------------------------------
-----------------------------------------------STQKAEREVTKMVI
-LMVLGFLVAWTPYATVAAWIFF--NKGAA-FSAQFMAIPAFFSKTSALYNPVIYVLLNKQ
-FRSCMLTTL-----FCGKNPLGDEE-SSTVSTSKTEVS----------------------
-------------------------------------------------------------
-----------------SVS-PA--------
+-------------------------MNGTE--G-------------------NNFYVPLS
+NRTG--------------------------------LVRSPFEYPQY-------------
+---YLAEPW---------QFKL-----LAVYMFFLICLGLPINGLTLICTAQHKKLRQPL
+NFILVNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSL
+VVLAIERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLV-GWS-----RYIP
+EGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDS
+A-----------------------------------------------------------
+------------------------------------------------------------
+----------------------STQKAEREVTKMVILMVLGFLVAWTPYATVAAWIFF-N
+KGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCMLTTLFC------GKNPLGD
+EE-SSTVSTSK-TEVSSVS--PA-------------------------------------
+-----------------------------------------------------------
> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208-
-MK--------------------QVPEFHEDFYIPIPLDINNLSAYSP-------------
---------------------FLVPQD-HLGNQG---------IFMA-----MSVFMFFIF
-IGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFFN-RYFI--
-FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGNF-TFKTPHAIAGCILPWIS
-ALAA-SLPPLF-GWS-----RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPF
-GTIVFCYGQLLITLKLAAKAQADSA-----------------------------------
-------------------------------------------------------------
-----------------------------------------------STQKAEREVTKMVV
-VMVLGFLVCWAPYASFSLWIVS--HRGEE-FDLRMATIPSCLSKASTVYNPVIYVLMNKQ
-FRSCMM-KM-----VCGKN-IEEDE-AS-TSSQVTQVS----------------------
-------------------------------------------------------------
-----------------SVA-PEK-------
+-------------------------MKQVPEFH-------------------EDFYIPIP
+LDIN------------------------------NLSAYSPFLVPQD-------------
+---HLGNQG---------IFMA-----MSVFMFFIFIGGASINILTILCTIQFKKLRSHL
+NYILVNLSIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSL
+AVVAFERWLVICKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLF-GWS-----RYIP
+EGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADS
+A-----------------------------------------------------------
+------------------------------------------------------------
+----------------------STQKAEREVTKMVVVMVLGFLVCWAPYASFSLWIVS-H
+RGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM-KMVC------GKN-IEE
+DE-AS-TSSQV-TQVSSVA--PEK------------------------------------
+-----------------------------------------------------------
> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
-MR--------------------KMS--EEEFYL-----FKNISSVGP-------------
---------------------WDGPQY-HIAPVW---------AFYL-----QAAFMGTVF
-LIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASCN-GYFV--
-FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGNF-RFSSKHALTVVLATWTI
-GIGV-SIPPFF-GWS-----RFIPEGLQCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPL
-SLICFSYTQLLRALKAVAAQQQESA-----------------------------------
-------------------------------------------------------------
-----------------------------------------------TTQKAEREVSRMVV
-VMVGSFCVCYVPYAAFAMYMVN--NRNHG-LDLRLVTIPSFFSKSACIYNPIIYCFMNKQ
-FQACIM-KM-----VCGKA-MTDES-DT-CSSQKTEVS----------------------
-------------------------------------------------------------
-----------------TVS-STQVGPN---
+-------------------------MRKMS--E-------------------EEFYL---
+--FK------------------------------NISSVGPWDGPQY-------------
+---HIAPVW---------AFYL-----QAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPL
+NYILVNVSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSL
+AFLAFERYIVICKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFF-GWS-----RFIP
+EGLQCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQES
+A-----------------------------------------------------------
+------------------------------------------------------------
+----------------------TTQKAEREVSRMVVVMVGSFCVCYVPYAAFAMYMVN-N
+RNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM-KMVC------GKA-MTD
+ES-DT-CSSQK-TEVSTVS--STQVGPN--------------------------------
+-----------------------------------------------------------
> 8=opsin, greensensitive human (fragment) S07060
------------------------------------------------------------
------------------------------------------------------------
-----------------------------------DLAETVIA-STISIVNQVS-GYFV--
-LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIW
-AAVW-TAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPL
-SIIVLCYLQVWLAIRAVAKQQKESE-----------------------------------
------------------------------------------------------------
-----------------------------------------------STQKAEKEVTRMVV
-VMVLAFC-----------------------------------------------------
+----------DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSL
+AIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIF-GWS-----RYWP
+HGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKES
+E-----------------------------------------------------------
------------------------------------------------------------
+----------------------STQKAEKEVTRMVVVMVLAFC-----------------
------------------------------------------------------------
-------------------------------
+------------------------------------------------------------
+-----------------------------------------------------------
> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
-MAQQWS-LQRLAGRHPQDSYEDSTQ--SSIFTY-----TNSNSTRGP-------------
---------------------FEGPNY-HIAPRW---------VYHL-----TSVWMIFVV
-IASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQVY-GYFV--
-LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNV-RFDAKLAIVGIAFSWIW
-AAVW-TAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPL
-SIIVLCYLQVWLAIRAVAKQQKESE-----------------------------------
-------------------------------------------------------------
-----------------------------------------------STQKAEKEVTRMVV
-VMVLAFCFCWGPYAFFACFAAA--NPGYP-FHPLMAALPAFFAKSATIYNPVIYVFMNRQ
-FRNCILQLF----------GKKVDDGSELSSASKTEVS----------------------
-------------------------------------------------------------
-----------------SV---SSVSPA---
+MAQQWS-LQRLAGRHPQDS-----YEDSTQ--S-------------------SIFTYTNS
+N-----------------------------------STRGPFEGPNY-------------
+---HIAPRW---------VYHL-----TSVWMIFVVIASVFTNGLVLAATMKFKKLRHPL
+NWILVNLAVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSL
+AIISWERWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIF-GWS-----RYWP
+HGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKES
+E-----------------------------------------------------------
+------------------------------------------------------------
+----------------------STQKAEKEVTRMVVVMVLAFCFCWGPYAFFACFAAA-N
+PGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL-QLF-------GKK-VDD
+GS-EL-SSASK-TEVSSV----SSVSPA--------------------------------
+-----------------------------------------------------------
> 10== Z68193 1 human Red Opsin <>[]
-MAQQWS-LQRLAGRHPQDSYEDSTQ--SSIFTY-----TNSNSTRGP-------------
---------------------FEGPNY-HIAPRW---------VYHL-----TSVWMIFVV
-TASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQVS-GYFV--
-LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIW
-SAVW-TAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPL
-AIIMLCYLQVWLAIRAVAKQQKESE-----------------------------------
-------------------------------------------------------------
-----------------------------------------------STQKAEKEVTRMVV
-VMIFAYCVCWGPYTFFACFAAA--NPGYA-FHPLMAALPAYFAKSATIYNPVIYVFMNRQ
-FRNCILQLF----------GKKVDDGSELSSASKTEVS----------------------
-------------------------------------------------------------
-----------------SV---SSVSPA---
+MAQQWS-LQRLAGRHPQDS-----YEDSTQ--S-------------------SIFTYTNS
+N-----------------------------------STRGPFEGPNY-------------
+---HIAPRW---------VYHL-----TSVWMIFVVTASVFTNGLVLAATMKFKKLRHPL
+NWILVNLAVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSL
+AIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIF-GWS-----RYWP
+HGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKES
+E-----------------------------------------------------------
+------------------------------------------------------------
+----------------------STQKAEKEVTRMVVVMIFAYCVCWGPYTFFACFAAA-N
+PGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL-QLF-------GKK-VDD
+GS-EL-SSASK-TEVSSV----SSVSPA--------------------------------
+-----------------------------------------------------------
> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92]
-MTEAWNVAVFAARRSRDD--DDTTR--GSVFTY-----TNTNNTRGP-------------
---------------------FEGPNY-HIAPRW---------VYNL-----VSFFMIIVV
-IASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQIF-GYFI--
-LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNI-KFDSKLAIIGIVFSWVW
-AWGW-SAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPL
-FIIIVCYLQVWMAIRAVAAQQKESE-----------------------------------
-------------------------------------------------------------
-----------------------------------------------STQKAEREVSRMVV
-VMIVAFCICWGPYASFVSFAAA--NPGYA-FHPLAAALPAYFAKSATIYNPVIYVFMNRQ
-FRNCIMQLF----------GKKVDDGSEASTTSRTEVS----------------------
-------------------------------------------------------------
-----------------SVS-NSSVAPA---
+MTEAWNVAVFAARRSRDD-------DDTTR--G-------------------SVFTYTNT
+N-----------------------------------NTRGPFEGPNY-------------
+---HIAPRW---------VYNL-----VSFFMIIVVIASCFTNGLVLVATAKFKKLRHPL
+NWILVNLAFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSL
+AIISWERWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIF-GWS-----RYWP
+HGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKES
+E-----------------------------------------------------------
+------------------------------------------------------------
+----------------------STQKAEREVSRMVVVMIVAFCICWGPYASFVSFAAA-N
+PGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM-QLF-------GKK-VDD
+GS-EA-STTSR-TEVSSVS--NSSVAPA--------------------------------
+-----------------------------------------------------------
> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
-MA-AWE-AAFAARRRHEE--EDTTR--DSVFTY-----TNSNNTRGP-------------
---------------------FEGPNY-HIAPRW---------VYNL-----TSVWMIFVV
-AASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQIS-GYFI--
-LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNI-KFDGKLAVAGILFSWLW
-SCAW-TAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPL
-AIIILCYLQVWLAIRAVAAQQKESE-----------------------------------
-------------------------------------------------------------
-----------------------------------------------STQKAEKEVSRMVV
-VMIVAYCFCWGPYTFFACFAAA--NPGYA-FHPLAAALPAYFAKSATIYNPIIYVFMNRQ
-FRNCILQLF----------GKKVDDGSEVST-SRTEVS----------------------
-------------------------------------------------------------
-----------------SVS-NSSVSPA---
+MA-AWE-AAFAARRRHEE-------EDTTR--D-------------------SVFTYTNS
+N-----------------------------------NTRGPFEGPNY-------------
+---HIAPRW---------VYNL-----TSVWMIFVVAASVFTNGLVLVATWKFKKLRHPL
+NWILVNLAVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSL
+AIISWERWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIF-GWS-----RYWP
+HGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKES
+E-----------------------------------------------------------
+------------------------------------------------------------
+----------------------STQKAEKEVSRMVVVMIVAYCFCWGPYTFFACFAAA-N
+PGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL-QLF-------GKK-VDD
+GS-EV-ST-SR-TEVSSVS--NSSVSPA--------------------------------
+-----------------------------------------------------------
> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
-MS--------------------------SNSSQ-----APPNGTPGP-------------
---------------------FDGPQWPYQAPQS---------TYVG-----VAVLMGTVV
-ACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNIN-GFFV--
-FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGDF-QFQRRHAVSGCAFTWGW
-ALLW-SAPPLL-GWS-----SYVPEGLRTSCGPNWYTGGSN--NNSYILSLFVTCFVLPL
-SLILFSYTNLLLTLRAAAAQQKEAD-----------------------------------
-------------------------------------------------------------
-----------------------------------------------TTQRAEREVTRMVI
-VMVMAFLLCWLPYSTFALVVAT--HKGII-IQPVLASLPSYFSKTATVYNPIIYVFMNKQ
-FQSCLLEML-----CCGYQPQRTGKASPGTPGPHADVT----------------------
-------------------------------------------------------------
-----------------AAGLRNKVMPAHPV
+-----------------------------M--S-------------------SNSSQAPP
+N-----------------------------------GTPGPFDGPQW-------------
+--PYQAPQS---------TYVG-----VAVLMGTVVACASVVNGLVIVVSICYKKLRSPL
+NYILVNLAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSL
+AILALERYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLL-GWS-----SYVP
+EGLRTSCGPNWYTGGSN--NNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEA
+D-----------------------------------------------------------
+------------------------------------------------------------
+----------------------TTQRAEREVTRMVIVMVMAFLLCWLPYSTFALVVAT-H
+KGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL-EMLCCG--YQPQR-TGK
+AS-PG-TPGPH-ADVTAAG-LRNKVMPAHPV-----------------------------
+-----------------------------------------------------------
> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
-MESGNVSSSLFGNVSTALRPE----ARLSA---E---TRLLGWNVPP-------------
---------------------EELR---HIPEHWLTYPEPPESMNYL-----LGTLYIFFT
-LMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYNSFH-QGYA--
-LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIAMIIFIYMY
-ATPW-VVACYTETWG-----RFVPEGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPT
-TMITYYYSQIVGHVFSHEKALRDQAKKMNVESLR--------------------------
-------------------------------------------------------------
------------------------------------------SNVDKNKETAEIRIAKAAI
-TICFLFFCSWTPYGVMSLIGAF--GDKTL-LTPGATMIPACACKMVACIDPFVYAISHPR
-YRMELQKRCPWL--ALNEKAPESSAVASTST-TQEPQQT---------------------
-------------------------------------------------------------
-----------------TAA-----------
+----------------MESGNVSSSLFGNV--S-------------------TALR----
+---P--------------------------------EARLSA---E---TRLLGWNVPPE
+ELRHIPEHWLTYPEPPESMNYL-----LGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPS
+NILVINLAFCDFMMM-VK-TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATN
+AFIAYDRFNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVP
+EGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQ
+AKKM------------------NVESLRS-------------------------------
+------------------------------------------------------------
+------------------NVDKNKETAEIRIAKAAITICFLFFCSWTPYGVMSLIGAF-G
+DKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQ-KRCPWL--ALNEK-APE
+SS-AV-ASTST-TQEPQQT----TAA----------------------------------
+-----------------------------------------------------------
> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
-MEYHNVSSVL-GNVSSVLRPD----ARLSA---E---SRLLGWNVPP-------------
---------------------DELR---HIPEHWLIYPEPPESMNYL-----LGTLYIFFT
-VISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYNSFH-QGYA--
-LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIAMIIFIYLY
-ATPW-VVACYTESWG-----RFVPEGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPT
-TMITYYYSQIVGHVFSHEKALRDQAKKMNVDSLR--------------------------
-------------------------------------------------------------
------------------------------------------SNVDKSKEAAEIRIAKAAI
-TICFLFFASWTPYGVMSLIGAF--GDKTL-LTPGATMIPACTCKMVACIDPFVYAISHPR
-YRMELQKRCPWL--AISEKAPESRAAISTST-TQEQQQT---------------------
-------------------------------------------------------------
-----------------TAA-----------
+----------------MEYHNVSSVL-GNV--S-------------------SVLR----
+---P--------------------------------DARLSA---E---SRLLGWNVPPD
+ELRHIPEHWLIYPEPPESMNYL-----LGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPS
+NILVINLAFCDFMMM-IK-TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATN
+AFIAYDRYNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVP
+EGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQ
+AKKM------------------NVDSLRS-------------------------------
+------------------------------------------------------------
+------------------NVDKSKEAAEIRIAKAAITICFLFFASWTPYGVMSLIGAF-G
+DKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQ-KRCPWL--AISEK-APE
+SR-AA-ISTST-TQEQQQT----TAA----------------------------------
+-----------------------------------------------------------
> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
-ME------PLCNASEPPLRPE----AR-SSGNGD---LQFLGWNVPP-------------
---------------------DQIQ---YIPEHWLTQLEPPASMHYM-----LGVFYIFLF
-CASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--NSFH-RGFAIY
-LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLY
-CTPW-VVLPLTQFWD-----RFVPEGYLTSCSFDYLSDN--FDTRLFVGTIFFFSFVCPT
-LMILYYYSQIVGHVFSHEKALREQAKKMNVESLR--------------------------
-------------------------------------------------------------
------------------------------------------SNVDKSKETAEIRIAKAAI
-TICFLFFVSWTPYGVMSLIGAF--GDKSL-LTQGATMIPACTCKLVACIDPFVYAISHPR
-YRLELQKRCPWL--GVNEKSGEISSAQSTTT-QEQQQTT---------------------
-------------------------------------------------------------
-----------------AA------------
+----------------------MEPLCNAS--E-------------------PPLR----
+---P--------------------------------EAR-SSGNGD---LQFLGWNVPPD
+QIQYIPEHWLTQLEPPASMHYM-----LGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPS
+NMFVLNLAVFDLIMC-LK-APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTN
+AAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVP
+EGYLTSCSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQ
+AKKM------------------NVESLRS-------------------------------
+------------------------------------------------------------
+------------------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G
+DKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQ-KRCPWL--GVNEK-SGE
+IS-SA-QSTTT-QEQQQTT----AA-----------------------------------
+-----------------------------------------------------------
> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
-MD------ALCNASEPPLRPE----ARMSSGSDE---LQFLGWNVPP-------------
---------------------DQIQ---YIPEHWLTQLEPPASMHYM-----LGVFYIFLF
-FASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYNSFH-RGFA--
-LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLY
-CTPW-VVLPLTQFWD-----RFVPEGYLTSCSFDYLSDN--FDTRLFVGTIFLFSFVVPT
-LMILYYYSQIVGHVFNHEKALREQAKKMNVESLR--------------------------
-------------------------------------------------------------
------------------------------------------SNVDKSKETAEIRIAKAAI
-TICFLFFVSWTPYGVMSLIGAF--GDKSL-LTPGATMIPACTCKLVACIEPFVYAISHPR
-YRMELQKRCPWL--GVNEKSGEASSAQSTTT-QEQTQQT---------------------
-------------------------------------------------------------
-----------------SAA-----------
+----------------------MDALCNAS--E-------------------PPLR----
+---P--------------------------------EARMSSGSDE---LQFLGWNVPPD
+QIQYIPEHWLTQLEPPASMHYM-----LGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPS
+NMFVLNLAVFDLIMC-LK-APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTN
+AAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVP
+EGYLTSCSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQ
+AKKM------------------NVESLRS-------------------------------
+------------------------------------------------------------
+------------------NVDKSKETAEIRIAKAAITICFLFFVSWTPYGVMSLIGAF-G
+DKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQ-KRCPWL--GVNEK-SGE
+AS-SA-QSTTT-QEQTQQT----SAA----------------------------------
+-----------------------------------------------------------
> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1
-M-----------TNATGPQMAYYGAASMDFGYPE---GVSIVDFVRP-------------
---------------------EIKP---YVHQHWYNYPPVNPMWHYL-----LGVIYLFLG
-TVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCFSGGVWM--
-FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGP-KLTTGKAVVFALISWVI
-AIGC-ALPPFF-GWG-----NYILEGILDSCSYDYLTQD--FNTFSYNIFIFVFDYFLPA
-AIIVFSYVFIVKAIFAHEAAMRAQAKKMNVSTLR--------------------------
-------------------------------------------------------------
------------------------------------------S-NEADAQRAEIRIAKTAL
-VNVSLWFICWTPYALISLKGVM--GDTSG-ITPLVSTLPALLAKSCSCYNPFVYAISHPK
-YRLAITQHLPWF--CVHETETKSNDDSQSNS-TVAQDK----------------------
-------------------------------------------------------------
-------------------A-----------
+-------------------------MTNAT--G-------------------PQMAY---
+--YG--------------------------------AASMDFGYPE---GVSIVDFVRPE
+IKPYVHQHWYNYPPVNPMWHYL-----LGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPA
+NILVVNLALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLL
+CMISFDRYNIICNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFF-GWG-----NYIL
+EGILDSCSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQ
+AKKM------------------NVSTLRS-------------------------------
+------------------------------------------------------------
+-------------------NEADAQRAEIRIAKTALVNVSLWFICWTPYALISLKGVM-G
+DTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAIT-QHLPWF--CVHET-ETK
+SN-DD-SQSNS-TVAQDKA-----------------------------------------
+-----------------------------------------------------------
> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1
-M-----------ANVTGPQMAFYGSGAATFGYPE---GMTVADFVPD-------------
---------------------RVKH---MVLDHWYNYPPVNPMWHYL-----LGVVYLFLG
-VISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCFSGGRWM--
-FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGP-KLTQGKATFMCGLAWVI
-SVGW-SLPPFF-GWG-----SYTLEGILDSCSYDYFTRD--MNTITYNICIFIFDFFLPA
-SVIVFSYVFIVKAIFAHEAAMRAQAKKMNVTNLR--------------------------
-------------------------------------------------------------
------------------------------------------S-NEAETQRAEIRIAKTAL
-VNVSLWFICWTPYAAITIQGLL--GNAEG-ITPLLTTLPALLAKSCSCYNPFVYAISHPK
-FRLAITQHLPWF--CVHEKDPNDVEENQSSN-TQTQEK----------------------
-------------------------------------------------------------
-------------------S-----------
+-------------------------MANVT--G-------------------PQMAF---
+--YG--------------------------------SGAATFGYPE---GMTVADFVPDR
+VKHMVLDHWYNYPPVNPMWHYL-----LGVVYLFLGVISIAGNGLVIYLYMKSQALKTPA
+NMLIVNLALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTL
+CMISFDRYNIICNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFF-GWG-----SYTL
+EGILDSCSYDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQ
+AKKM------------------NVTNLRS-------------------------------
+------------------------------------------------------------
+-------------------NEAETQRAEIRIAKTALVNVSLWFICWTPYAAITIQGLL-G
+NAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAIT-QHLPWF--CVHEK-DPN
+DV-EE-NQSSN-TQTQEKS-----------------------------------------
+-----------------------------------------------------------
> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
-ME-----SFAVAAAQLGPHFA-----PLS--------NGSVVDKVTP-------------
---------------------DMAH---LISPYWNQFPAMDPIWAKI-----LTAYMIMIG
-MISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--
-LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKM------
----------------------YVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPL
-FLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLR--------------------------
-------------------------------------------------------------
------------------------------------------S-SEDAEKSAEGKLAKVAL
-VTITLWFMAWTPYLVINCMGLF--KF-EG-LTPLNTIWGACFAKSAACYNPIVYGISHPK
-YRLALKEKCPCC--VFGKVDDGKSSDAQSQA-TASEAE----------------------
-------------------------------------------------------------
-----------------SKA-----------
+----------------MES---FAVAAAQL--G-------------------PHFA----
+--------------------------------------PLS--------NGSVVDKVTPD
+MAHLISPYWNQFPAMDPIWAKI-----LTAYMIMIGMISWCGNGVVIYIFATTKSLRTPA
+NLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSM
+CMISLDRYQVIVKGMAGR-PMTIPLALGKM---------------------------YVP
+EGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQ
+AKKM------------------NVKSLRS-------------------------------
+------------------------------------------------------------
+-------------------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K
+F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK-EKCPCC--VFGKV-DDG
+KS-SD-AQSQA-TASEAES----KA-----------------------------------
+-----------------------------------------------------------
> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
-ME-----SFAVAAAQLGPHFA-----PLS--------NGSVVDKVTP-------------
---------------------DMAH---LISPYWNQFPAMDPIWAKI-----LTAYMIMIG
-MISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--
-LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFM
-SSIW-CLAPAF-GWS-----RYVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPL
-FLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLR--------------------------
-------------------------------------------------------------
------------------------------------------S-SEDAEKSAEGKLAKVAL
-VTITLWFMAWTPYLVINCMGLF--KF-EG-LTPLNTIWGACFAKSAACYNPIVYGISHPK
-YRLALKEKCPCC--VFGKVDDGKSSDAQSQA-TASEAE----------------------
-------------------------------------------------------------
-----------------SKA-----------
+----------------MES---FAVAAAQL--G-------------------PHFA----
+--------------------------------------PLS--------NGSVVDKVTPD
+MAHLISPYWNQFPAMDPIWAKI-----LTAYMIMIGMISWCGNGVVIYIFATTKSLRTPA
+NLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSM
+CMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAF-GWS-----RYVP
+EGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQ
+AKKM------------------NVKSLRS-------------------------------
+------------------------------------------------------------
+-------------------SEDAEKSAEGKLAKVALVTITLWFMAWTPYLVINCMGLF-K
+F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK-EKCPCC--VFGKV-DDG
+KS-SD-AQSQA-TASEAES----KA-----------------------------------
+-----------------------------------------------------------
> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
-MD-----SFAAVATQLGPQFA-----APS--------NGSVVDKVTP-------------
---------------------DMAH---LISPYWDQFPAMDPIWAKI-----LTAYMIIIG
-MISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--
-LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFM
-STIWCCLAPVF-GWS-----RYVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPL
-FLICYSYWFIIAAVSAHEKAMREQAKKMNVKSLR--------------------------
-------------------------------------------------------------
------------------------------------------S-SEDADKSAEGKLAKVAL
-VTISLWFMAWTPYLVINCMGLF--KF-EG-LTPLNTIWGACFAKSAACYNPIVYGISHPK
-YRLALKEKCPCC--VFGKVDDGKSSEAQSQA-TTSEAE----------------------
-------------------------------------------------------------
-----------------SKA-----------
+----------------MDS---FAAVATQL--G-------------------PQFA----
+--------------------------------------APS--------NGSVVDKVTPD
+MAHLISPYWDQFPAMDPIWAKI-----LTAYMIIIGMISWCGNGVVIYIFATTKSLRTPA
+NLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSM
+CMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVF-GWS-----RYVP
+EGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQ
+AKKM------------------NVKSLRS-------------------------------
+------------------------------------------------------------
+-------------------SEDADKSAEGKLAKVALVTISLWFMAWTPYLVINCMGLF-K
+F-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK-EKCPCC--VFGKV-DDG
+KS-SE-AQSQA-TTSEAES----KA-----------------------------------
+-----------------------------------------------------------
> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
-MERSHLPETPFDLAHSGPRFQ-----AQSSG------NGSVLDNVLP-------------
---------------------DMAH---LVNPYWSRFAPMDPMMSKI-----LGLFTLAIM
-IISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--
-LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGT-PMTIKTSIMKILFIWMM
-AVFW-TVMPLI-GWS-----AYVPEGNLTACSIDYMTRM--WNPRSYLITYSLFVYYTPL
-FLICYSYWFIIAAVAAHEKAMREQAKKMNVKSLR--------------------------
-------------------------------------------------------------
------------------------------------------S-SEDCDKSAEGKLAKVAL
-TTISLWFMAWTPYLVICYFGLF--KI-DG-LTPLTTIWGATFAKTSAVYNPIVYGISHPK
-YRIVLKEKCPMC--VFGNTDEPKPDAPASDTETTSEAD----------------------
-------------------------------------------------------------
-----------------SKA-----------
+-----------MERSHLPE---TPFDLAHS--G-------------------PRFQ----
+--------------------------------------AQSSG------NGSVLDNVLPD
+MAHLVNPYWSRFAPMDPMMSKI-----LGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPA
+NLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSM
+CMIAFDRYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLI-GWS-----AYVP
+EGNLTACSIDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQ
+AKKM------------------NVKSLRS-------------------------------
+------------------------------------------------------------
+-------------------SEDCDKSAEGKLAKVALTTISLWFMAWTPYLVICYFGLF-K
+I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLK-EKCPMC--VFGNT-DEP
+KP-DA-PASDTETTSEADS----KA-----------------------------------
+-----------------------------------------------------------
> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
-MERSLLPEPPLAMALLGPRFE-----AQTGG------NRSVLDNVLP-------------
---------------------DMAP---LVNPHWSRFAPMDPTMSKI-----LGLFTLVIL
-IISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--
-LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGT-PMTIKTSIMKIAFIWMM
-AVFW-TIMPLI-GWS-----SYVPEGNLTACSIDYMTRQ--WNPRSYLITYSLFVYYTPL
-FMICYSYWFIIATVAAHEKAMRDQAKKMNVKSLR--------------------------
-------------------------------------------------------------
------------------------------------------S-SEDCDKSAENKLAKVAL
-TTISLWFMAWTPYLIICYFGLF--KI-DG-LTPLTTIWGATFAKTSAVYNPIVYGISHPN
-DRLVLKEKCPMC--VCGTTDEPKPDAPPSDTETTSEAE----------------------
-------------------------------------------------------------
-----------------SKD-----------
+-----------MERSLLPE---PPLAMALL--G-------------------PRFE----
+--------------------------------------AQTGG------NRSVLDNVLPD
+MAPLVNPHWSRFAPMDPTMSKI-----LGLFTLVILIISCCGNGVVVYIFGGTKSLRTPA
+NLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSM
+CMIAFDRYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLI-GWS-----SYVP
+EGNLTACSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQ
+AKKM------------------NVKSLRS-------------------------------
+------------------------------------------------------------
+-------------------SEDCDKSAENKLAKVALTTISLWFMAWTPYLIICYFGLF-K
+I-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLK-EKCPMC--VCGTT-DEP
+KP-DA-PPSDTETTSEAES----KD-----------------------------------
+-----------------------------------------------------------
> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
-M-----------IAVSGPSYE-----AFSYGGQARFNNQTVVDKVPP-------------
---------------------DMLH---LIDANWYQYPPLNPMWHGI-----LGFVIGMLG
-FVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCYY-ETWV--
-LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGK-PLSINGALIRIIAIWLF
-SLGW-TIAPMF-GWN-----RYVPEGNMTACGTDYFNRG--LLSASYLVCYGIWVYFVPL
-FLIIYSYWFIIQAVAAHEKNMREQAKKMNVASLR--------------------------
-------------------------------------------------------------
------------------------------------------S-SENQNTSAECKLAKVAL
-MTISLWFMAWTPYLVINFSGIF--NL-VK-ISPLFTIWGSLFAKANAVYNPIVYGISHPK
-YRAALFAKFPSL--AC-AAEPSSDAVSTTSG-TTTVTDN---------------------
-------------------------------------------------------------
--------------EK-SNA-----------
+-------------------------MIAVS--G-------------------PSYE----
+--------------------------------------AFSYGGQARFNNQTVVDKVPPD
+MLHLIDANWYQYPPLNPMWHGI-----LGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPS
+NLFVINLAISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTM
+TMIAFDRYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMF-GWN-----RYVP
+EGNMTACGTDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQ
+AKKM------------------NVASLRS-------------------------------
+------------------------------------------------------------
+-------------------SENQNTSAECKLAKVALMTISLWFMAWTPYLVINFSGIF-N
+L-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALF-AKFPSL--AC-AA-EPS
+SD-AV-STTSG-TTTVTDN----EKSNA--------------------------------
+-----------------------------------------------------------
> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
-M-------------ANQLSYS-----SLGWPYQP---NASVVDTMPK-------------
---------------------EMLY---MIHEHWYAFPPMNPLWYSI-----LGVAMIILG
-IICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCFA-ETWI--
-LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAA-PLTHKKATLLLLFVWIW
-SGGW-TILPFF-GWS-----RYVPEGNLTSCTVDYLTKD--WSSASYVVIYGLAVYFLPL
-ITMIYCYFFIVHAVAEHEKQLREQAKKMNVASLR--------------------------
-------------------------------------------------------------
------------------------------------------ANADQQKQSAECRLAKVAM
-MTVGLWFMAWTPYLIISWAGVF--SSGTR-LTPLATIWGSVFAKANSCYNPIVYGISHPR
-YKAALYQRFPSL--ACGSGESGSDVKSEASA-TTTMEEK---------------------
-------------------------------------------------------------
--------------PKIPEA-----------
+---------------------------MAN--Q-------------------LSYS----
+--------------------------------------SLGWPYQP---NASVVDTMPKE
+MLYMIHEHWYAFPPMNPLWYSI-----LGVAMIILGIICVLGNGMVIYLMMTTKSLRTPT
+NLLVVNLAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSM
+VMITLDRYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFF-GWS-----RYVP
+EGNLTSCTVDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQ
+AKKM------------------NVASLRA-------------------------------
+------------------------------------------------------------
+------------------NADQQKQSAECRLAKVAMMTVGLWFMAWTPYLIISWAGVF-S
+SGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALY-QRFPSL--ACGSG-ESG
+SD-VK-SEASA-TTTMEEK----PKIPEA-------------------------------
+-----------------------------------------------------------
> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
-M-------------VESTTLV-----NQTWWY-----NPTV-------------------
----------------------------DIHPHWAKFDPIPDAVYYS-----VGIFIGVVG
-IIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAFM-KKWI--
-FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMW
-SIVW-SVGPVF-NWG-----AYVPEGILTSCSFDYLSTD--PSTRSFILCMYFCGFMLPI
-IIIAFCYFNIVMSVSNHEKEMAAMAKRLNAKELR--------------------------
-------------------------------------------------------------
------------------------------------------K-AQ-AGASAEMKLAKISM
-VIITQFMLSWSPYAIIALLAQF--GPAEW-VTPYAAELPVLFAKASAIHNPIVYSVSHPK
-FREAIQTTFPWLLTCCQFDEKECEDANDAEE-EVVASER--GGESRDAAQMKEMMAMMQK
-MQAQQAAYQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQ
-VEAPQGAPPQGVDNQAYQA-----------
+---------------------------MVE--S-------------------TTLV----
+--------------------------------------NQTWWY-----NPTV-------
+---DIHPHWAKFDPIPDAVYYS-----VGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPA
+NMFIINLAMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTM
+AMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVF-NWG-----AYVP
+EGILTSCSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAM
+AKRL------------------NAKELRK-------------------------------
+------------------------------------------------------------
+-------------------AQ-AGASAEMKLAKISMVIITQFMLSWSPYAIIALLAQF-G
+PAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQ-TTFPWLLTCCQFD-EKE
+CE-DA-NDAEE-EVVASER--GGESRDAAQMKEMMAMMQKMQAQQAAYQP---PPPPQGY
+PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQAYQA
> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93]
-M-------------GRDLR-D-----NETWWY-----NPSI-------------------
----------------------------VVHPHWREFDQVPDAVYYS-----LGIFIGICG
-IIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCFL-KKWI--
-FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLW
-SVLW-AIGPIF-GWG-----AYTLEGVLCNCSFDYISRD--STTRSNILCMFILGFFGPI
-LIIFFCYFNIVMSVSNHEKEMAAMAKRLNAKELR--------------------------
-------------------------------------------------------------
------------------------------------------K-AQ-AGANAEMRLAKISI
-VIVSQFLLSWSPYAVVALLAQF--GPLEW-VTPYAAQLPVMFAKASAIHNPMIYSVSHPK
-FREAISQTFPWVLTCCQFDDKETEDDKDAET-EIPAGESSDAAPSADAAQMKEMMAMMQK
-MQQQQAAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP-
------AAPPQGVDNQAYQA-----------
+----------------------------MG--R-------------------DLRD----
+--------------------------------------NETWWY-----NPSI-------
+---VVHPHWREFDQVPDAVYYS-----LGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPA
+NMFIINLAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTM
+AMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIF-GWG-----AYTL
+EGVLCNCSFDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAM
+AKRL------------------NAKELRK-------------------------------
+------------------------------------------------------------
+-------------------AQ-AGANAEMRLAKISIVIVSQFLLSWSPYAVVALLAQF-G
+PLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAIS-QTFPWVLTCCQFD-DKE
+TE-DD-KDAET-EIPAGESSDAAPSADAAQMKEMMAMMQKMQQQQAAYPPQGYAPPPQGY
+PPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP------AAPPQGVDNQAYQA
> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra
-MMDVNSSGRPDLYGHLRSFLLPEVGRGLPDLSPDG----GADPVAGSWAPHLLS---EVT
-ASPAPTWDAPPDNA------SGCGE----------QINYGRVEKVV-----IGSILTLIT
-LLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--
-FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLL
-SASI-TLPPLF-GWA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPM
-SVMLFMYYQIYKAARKSAAKHKF--------------PGFPRVEPDSVIALNG-------
----------------------------------------------IVKLQKE--------
-----------VEECAN------------------LSRLLKHERKNISIFKREQKAATTLG
-IIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRD
-LRTTYRSLL-----QCQYRNINRKLSAAGMHEALKLAERPERPEFVLQNADYCRKKGHDS
-------------------------------------------------------------
-------------------------------
+------------------------MMDVNS-SG----RPDLYGHLRSFLLPEVGRGLPDL
+SPDG------------GADPVAGSWAPHLLS---EVTASPA---PTW--DA------PPD
+NASGCGEQ----INYGRVEKVV-----IGSILTLITLLTIAGNCLVVISVCFVKKLRQPS
+NYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTL
+CVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVN
+DDKVCLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-
+-------------PGFPRVEPDSVIALNG-------------------------------
+---------------------IVKLQKE------------------VEECAN--------
+----------LSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC
+GTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYR-SLLQCQ--YRNIN-RKL
+SA-AGMHEALKLAERPERPEFVLQNADYCRKK----------------------------
+--------------------------------------------------GHDS-----
> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
-M------------------------------------------------PHLLSGFLEVT
-ASPAPTWDAPPDNV------SGCGE----------QINYGRVEKVV-----IGSILTLIT
-LLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--
-FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLL
-SASI-TLPPLF-GWA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPM
-SVMLFMYYQIYKAARKSAAKHKF--------------PGFPRVQPESVISLNG-------
----------------------------------------------VVKLQKE--------
-----------VEECAN------------------LSRLLKHERKNISIFKREQKAATTLG
-IIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRD
-LRPTSRSLL-----QCQYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT
-------------------------------------------------------------
-------------------------------
+------------------------------------------------------------
+-------------------------MPHLLSGFLEVTASPA---PTW--DA------PPD
+NVSGCGEQ----INYGRVEKVV-----IGSILTLITLLTIAGNCLVVISVSFVKKLRQPS
+NYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTL
+CVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVN
+DDKVCLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-
+-------------PGFPRVQPESVISLNG-------------------------------
+---------------------VVKLQKE------------------VEECAN--------
+----------LSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC
+GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSR-SLLQCQ--YRNIN-RKL
+SA-AGMHEALKLAERPERSEFVLQNSDHCGKK----------------------------
+--------------------------------------------------GHDT-----
> 31=p A47425 serotonin receptor 5HT-7 - rat
-M------------------------------------------------PHLLSGFLEVT
-ASPAPTWDAPPDNV------SGCGE----------QINYGRVEKVV-----IGSILTLIT
-LLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--
-FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLL
-SASI-TLPPLF-GWA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPM
-SVMLFMYYQIYKAARKSAAKHKF--------------PGFPRVQPESVISLNG-------
----------------------------------------------VVKLQKE--------
-----------VEECAN------------------LSRLLKHERKNISIFKREQKAATTLG
-IIVGAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRD
-LRTTYRSLL-----QCQYRNINRKLSAAGMHEALKLAERPERSEFVLQNSDHCGKKGHDT
-------------------------------------------------------------
-------------------------------
+------------------------------------------------------------
+-------------------------MPHLLSGFLEVTASPA---PTW--DA------PPD
+NVSGCGEQ----INYGRVEKVV-----IGSILTLITLLTIAGNCLVVISVSFVKKLRQPS
+NYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTL
+CVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVN
+DDKVCLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF-
+-------------PGFPRVQPESVISLNG-------------------------------
+---------------------VVKLQKE------------------VEECAN--------
+----------LSRLLKHERKNISIFKREQKAATTLGIIVGAFTVCWLPFFLLSTARPFIC
+GTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYR-SLLQCQ--YRNIN-RKL
+SA-AGMHEALKLAERPERSEFVLQNSDHCGKK----------------------------
+--------------------------------------------------GHDT-----
> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
-MDVLSPG-----------------------------------------------------
----------QGNNT------TSPPAPFETGGNTTGISDVTVSYQVI-----TSLLLGTLI
-FCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--
-LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLI
-GFLI-SIPPML-GWRTPEDRSDPD---ACTISKDH----------GYTIYSTFGAFYIPL
-LLMLVLYGRIFRAARFRIRKTVKKVEKTGADTRHGASPAPQPKK-----SVNG--ESGSR
-NWRLGVESKAGGALCANGAVRQGDDGAAL--EVIEVHRVGNSKEHLPLPSEAGPTPCAPA
-S------------------FERKNERNA-------------EAKRKMALARERKTVKTLG
-IIMGTFILCWLPFFIVALVLPF-CESSCH-MPTLLGAIINWLGYSNSLLNPVIYAYFNKD
-FQNAFKKII-----KCKFCR--Q-------------------------------------
-------------------------------------------------------------
-------------------------------
+-------------------------MDVLS-PG---------------------------
+------------------------------------QGNNT--------TSPPAPFETGG
+NTTGIS-------DVTVSYQVI-----TSLLLGTLIFCAVLGNACVVAAIALERSLQNVA
+NYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHL
+CAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTPEDRSDPD
+---ACTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKK
+VEKTGADTRHGASPAPQPKK-----SVNG--ESGSRNWRLGVESKAGGALCANGAVRQGD
+DGAAL--EVIEVHRVGNSKEHLPLPSEAGPTPCAPAS------------------FERKN
+ERNA-------------EAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C
+ESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK-KIIKCK--FCRQ-----
+------------------------------------------------------------
+-----------------------------------------------------------
> 33=p A35181 serotonin receptor class 1A - rat
-MDVFSFG-----------------------------------------------------
----------QGNNT------TASQEPFGTGGNVTSISDVTFSYQVI-----TSLLLGTLI
-FCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--
-LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLI
-GFLI-SIPPML-GWRTPEDRSDPD---ACTISKDH----------GYTIYSTFGAFYIPL
-LLMLVLYGRIFRAARFRIRKTVRKVEKKGAGTSLGTSSAPPPKK-----SLNG--QPGSG
-DWRRCAENRAVGTPCTNGAVRQGDDEATL--EVIEVHRVGNSKEHLPLPSESGSNSYAPA
-C------------------LERKNERNA-------------EAKRKMALARERKTVKTLG
-IIMGTFILCWLPFFIVALVLPF-CESSCH-MPALLGAIINWLGYSNSLLNPVIYAYFNKD
-FQNAFKKII-----KCKFCR--R-------------------------------------
-------------------------------------------------------------
-------------------------------
+-------------------------MDVFS-FG---------------------------
+------------------------------------QGNNT--------TASQEPFGTGG
+NVTSIS-------DVTFSYQVI-----TSLLLGTLIFCAVLGNACVVAAIALERSLQNVA
+NYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHL
+CAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTPEDRSDPD
+---ACTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRK
+VEKKGAGTSLGTSSAPPPKK-----SLNG--QPGSGDWRRCAENRAVGTPCTNGAVRQGD
+DEATL--EVIEVHRVGNSKEHLPLPSESGSNSYAPAC------------------LERKN
+ERNA-------------EAKRKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPF-C
+ESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK-KIIKCK--FCRR-----
+------------------------------------------------------------
+-----------------------------------------------------------
> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
-MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS
-HSSRSRDNASANDT------SATNM---TDDRYWSLTVYSHEHLVL-----TSVILGLFV
-LCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--
-LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIV
-ALFI-SIPPLF-GWRDPNN--DPDKTGTCIISQDK----------GYTIFSTVGAFYLPM
-LVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDST
-TEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQIQIETAEAFANG
-CAEEASIAMLERQCNNGKKISSNDTPYS-------------RTREKLELKRERKAARTLA
-IITGAFLICWLPFFIIALIGPF-VDPE-G-IPPFARSFVLWLGYFNSLLNPIIYTIFSPE
-FRSAFQKIL-----FGKYRRGHR-------------------------------------
-------------------------------------------------------------
-------------------------------
+---------------MANFTFGDLALDVAR-MGGLASTP---SGLRS-----TGLTTPGL
+SPTGLVTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM---TDD
+RYWSLT-------VYSHEHLVL-----TSVILGLFVLCCIIGNCFVIAAVMLERSLHNVA
+NYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHL
+VAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDPNN--DPD
+KTGTCIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQ
+MTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKN
+RAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSND
+TPYS-------------RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-V
+DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ-KILFGK--YRRGH-R--
+------------------------------------------------------------
+-----------------------------------------------------------
> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
-MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS
-HSSRSRDNASANDT------SATNM---TDDRYWSLTVYSHEHLVL-----TSVILGLFV
-LCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--
-LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIV
-ALFI-SIPPLF-GWRDPNN--DPDKTGTCIISQDK----------GYTIFSTVGAFYLPM
-LVMMIIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDST
-TEKKKRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQIQIETAEAFANG
-CAEEASIAMLERQCNNGKKISSNDTPYS-------------RTREKLELKRERKAARTLA
-IITGAFLICWLPFFIIALIGPF-VDPE-G-IPPFARSFVLWLGYFNSLLNPIIYTIFSPE
-FRSAFQKIL-----FGKYRRGHR-------------------------------------
-------------------------------------------------------------
-------------------------------
+---------------MANFTFGDLALDVAR-MGGLASTP---SGLRS-----TGLTTPGL
+SPTGLVTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM---TDD
+RYWSLT-------VYSHEHLVL-----TSVILGLFVLCCIIGNCFVIAAVMLERSLHNVA
+NYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHL
+VAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDPNN--DPD
+KTGTCIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQ
+MTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKN
+RAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEEASIAMLERQCNNGKKISSND
+TPYS-------------RTREKLELKRERKAARTLAIITGAFLICWLPFFIIALIGPF-V
+DPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ-KILFGK--YRRGH-R--
+------------------------------------------------------------
+-----------------------------------------------------------
> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi
-MEG-AEGQEELDWEAL--------------------------------------------
-YLRLPLQNCSWNSTGWEPNWNVTVV---PNTTWWQASAPFDTPAALVRAAAKAVVLGLLI
-LATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEVV-QRWT--
-LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGMMIACVWTV
-SFFV-CIAQLL-GWKDPDWNQRVSEDLRCVVSQDV----------GYQIFATASSFYVPV
-LIILILYWRIYQTARKRIRR------RRGATARGGVGPPPVPAG----------------
------------------GALVAGGGSGGIAAAVVAV-----IGRPLPTISETTTTGFTNV
-SSNNTSPE--KQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKRERKAAKTLA
-IITGAFVACWLPFFVLAILVPT-CD--CE-VSPVLTSLSLWLGYFNSTLNPVIYTVFSPE
-FRHAFQRLL-----CGRRVRRRRA----------------------------------PQ
-------------------------------------------------------------
-------------------------------
+----------------MEGAEGQEELDWEA-LY---------------------LRLP--
+-----------------------------LQ---NCSWNSTGWEPNW--NVTVV---PNT
+TWWQAS-------APFDTPAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAA
+NNLILSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHL
+VAIALDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLL-GWKDPDWNQRVS
+EDLRCVVSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRR----
+--RRGATARGGVGPPPVPAG---------------------------------GALVAGG
+GSGGIAAAVVAV-----IGRPLPTISETTTTGFTNVSSNNTSPE--KQSCANGLEADPPT
+TGYGAVAAAYYPSLVRRKPKEAADSKRERKAAKTLAIITGAFVACWLPFFVLAILVPT-C
+DCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQ-RLLCGR--RVRRR-RA-
+---------------PQ-------------------------------------------
+-----------------------------------------------------------
> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
-MN----------------------GTEGDNFYVPFSNKTGLA--RSPYEYPQY-------
-------------------YLAEPW-------------------KYSALAAYMFFLILVGF
-PVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPT
-MCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC
--AAPPLV-GWS-----RYIPEGMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIF
-FCYGRLLCTVKEAAAAQQESA---------------------------------------
-------------------------------------------------------------
-------------------------------------------STQKAEKEVTRMVVLMVI
-GFLVCWVPYASVAFYIFT--HQGSD-FGATFMTLPAFFAKSSALYNPVIYILMNKQFRNC
-MITTL-----CCGKNPLGDDESG--AS-TSKTEVS-------------------------
-------------------------------------------------------------
--SVS-TSPVSPA---
+--------------------MNGTE-G---------------------DNFYVPFSNKTG
+--------------------------------LARSPYEYPQY-----------------
+-------YLAEPW---------KYSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLN
+YILLNLAMANLFMVLFG-FTVTMYTSMN-GYFV--FGPTMCSIEGFFATLGGEVALWSLV
+VLAIERYIVICKPMGNF-RFGNTHAIMGVAFTWIMALAC-AAPPLV-GWS-----RYIPE
+GMQCSCGPDYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTVKEAAAAQQESA
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------STQKAEKEVTRMVVLMVIGFL
+VCWVPYASVAFYIFT---HQGS-DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRNCMI
+TTLCC-----GKNPLGDDE--SGASTSK-TEVSSVS-TSPVSPA----------------
+------------------------------------------------------------
+------------------------
> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]
-MN----------------------GTEGPNFYVPFSNITGVV--RSPFEQPQY-------
-------------------YLAEPW-------------------QFSMLAAYMFLLIVLGF
-PINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPT
-GCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC
--AAPPLV-GWS-----RYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF
-FCYGQLVFTVKEAAAQQQESA---------------------------------------
-------------------------------------------------------------
-------------------------------------------TTQKAEKEVTRMVIIMVI
-FFLICWLPYASVAMYIFT--HQGSN-FGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNC
-MLTSL-----CCGKNPLGDDEAS--AT-ASKTETS-------------------------
-------------------------------------------------------------
--QVA-PA--------
+--------------------MNGTE-G---------------------PNFYVPFSNITG
+--------------------------------VVRSPFEQPQY-----------------
+-------YLAEPW---------QFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLN
+YILLNLAVADLFMVFGG-FTTTLYTSLH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLV
+VLAIERYVVVCKPMSNF-RFGENHAIMGVAFTWVMALAC-AAPPLV-GWS-----RYIPE
+GMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTVKEAAAQQQESA
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------TTQKAEKEVTRMVIIMVIFFL
+ICWLPYASVAMYIFT---HQGS-NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRNCML
+TSLCC-----GKNPLGDDE--ASATASK-TETSQVA-PA---------------------
+------------------------------------------------------------
+------------------------
> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9
-MN----------------------GTEGINFYVPMSNKTGVV--RSPFEYPQY-------
-------------------YLAEPW-------------------KYRLVCCYIFFLISTGL
-PINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPV
-GCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC
--AAPPLF-GWS-----RYMPEGMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIF
-FSYGRLICKVREAAAQQQESA---------------------------------------
-------------------------------------------------------------
-------------------------------------------TTQKAEKEVTRMVILMVL
-GFMLAWTPYAVVAFWIFT--NKGAD-FTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNC
-MITTI-----CCGKNPFGDEDVSSTVS-QSKTEVS-------------------------
-------------------------------------------------------------
--SVS-SSQVSPA---
+--------------------MNGTE-G---------------------INFYVPMSNKTG
+--------------------------------VVRSPFEYPQY-----------------
+-------YLAEPW---------KYRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLN
+YILVNLAVADLFMACFG-FTVTFYTAWN-GYFV--FGPVGCAVEGFFATLGGQVALWSLV
+VLAIERYIVVCKPMGNF-RFSATHAMMGIAFTWVMAFSC-AAPPLF-GWS-----RYMPE
+GMQCSCGPDYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKVREAAAQQQESA
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------TTQKAEKEVTRMVILMVLGFM
+LAWTPYAVVAFWIFT---NKGA-DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRNCMI
+TTICC-----GKNPFGDEDVSSTVSQSK-TEVSSVS-SSQVSPA----------------
+------------------------------------------------------------
+------------------------
> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
-MN----------------------GTEGKNFYVPMSNRTGLV--RSPFEYPQY-------
-------------------YLAEPW-------------------QFKILALYLFFLMSMGL
-PINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPT
-GCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC
--AAPPLF-GWS-----RYIPEGMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIF
-FTYGRLVCTVKAAAAQQQDSA---------------------------------------
-------------------------------------------------------------
-------------------------------------------STQKAEREVTKMVILMVF
-GFLIAWTPYATVAAWIFF--NKGAD-FSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNC
-MLTTI-----FCGKNPLGDDESS-TVS-TSKTEVS-------------------------
-------------------------------------------------------------
--SVS-PA--------
+--------------------MNGTE-G---------------------KNFYVPMSNRTG
+--------------------------------LVRSPFEYPQY-----------------
+-------YLAEPW---------QFKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLN
+FILVNLAVAGTIMVCFG-FTVTFYTAIN-GYFV--LGPTGCAVEGFMATLGGEVALWSLV
+VLAIERYIVVCKPMGSF-KFSSSHAFAGIAFTWVMALAC-AAPPLF-GWS-----RYIPE
+GMQCSCGPDYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTVKAAAAQQQDSA
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------STQKAEREVTKMVILMVFGFL
+IAWTPYATVAAWIFF---NKGA-DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRNCML
+TTIFC-----GKNPLGDDE-SSTVSTSK-TEVSSVS-PA---------------------
+------------------------------------------------------------
+------------------------
> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
-MN----------------------GTEGNNFYVPLSNRTGLV--RSPFEYPQY-------
-------------------YLAEPW-------------------QFKLLAVYMFFLICLGL
-PINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPT
-GCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC
--AAPPLV-GWS-----RYIPEGIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIF
-FTYGRLVCTVKAAAAQQQDSA---------------------------------------
-------------------------------------------------------------
-------------------------------------------STQKAEREVTKMVILMVL
-GFLVAWTPYATVAAWIFF--NKGAA-FSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSC
-MLTTL-----FCGKNPLGDEESS-TVS-TSKTEVS-------------------------
-------------------------------------------------------------
--SVS-PA--------
+--------------------MNGTE-G---------------------NNFYVPLSNRTG
+--------------------------------LVRSPFEYPQY-----------------
+-------YLAEPW---------QFKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLN
+FILVNLAVAGAIMVCFG-FTVTFYTAIN-GYFA--LGPTGCAVEGFMATLGGEVALWSLV
+VLAIERYIVVCKPMGSF-KFSSTHASAGIAFTWVMAMAC-AAPPLV-GWS-----RYIPE
+GIQCSCGPDYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTVKAAAAQQQDSA
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------STQKAEREVTKMVILMVLGFL
+VAWTPYATVAAWIFF---NKGA-AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRSCML
+TTLFC-----GKNPLGDEE-SSTVSTSK-TEVSSVS-PA---------------------
+------------------------------------------------------------
+------------------------
> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208-
-MK--------------------QVPEFHEDFYIPIPLDINNLSAYSPFLVPQD-------
-------------------HLGNQG-------------------IFMAMSVFMFFIFIGGA
-SINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-SPLSFYSFFN-RYFI--FGAT
-ACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGNF-TFKTPHAIAGCILPWISALAA
--SLPPLF-GWS-----RYIPEGLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIV
-FCYGQLLITLKLAAKAQADSA---------------------------------------
-------------------------------------------------------------
-------------------------------------------STQKAEREVTKMVVVMVL
-GFLVCWAPYASFSLWIVS--HRGEE-FDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSC
-MMKMV------CGKN-IEEDEAS--TS-SQVTQVS-------------------------
-------------------------------------------------------------
--SVA-PEK-------
+--------------------MKQVP-E-------------------FHEDFYIPIPLDIN
+------------------------------NLSAYSPFLVPQD-----------------
+-------HLGNQG---------IFMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLN
+YILVNLSIANLFVAIFG-SPLSFYSFFN-RYFI--FGATACKIEGFLATLGGMVGLWSLA
+VVAFERWLVICKPLGNF-TFKTPHAIAGCILPWISALAA-SLPPLF-GWS-----RYIPE
+GLQCSCGPDWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITLKLAAKAQADSA
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------STQKAEREVTKMVVVMVLGFL
+VCWAPYASFSLWIVS---HRGE-EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRSCMM
+KMVC------GKN-IEEDE--ASTSSQV-TQVSSVA-PEK--------------------
+------------------------------------------------------------
+------------------------
> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
-MR----------------------KMSEEEFYL-----FKNISSVGPWDGPQY-------
-------------------HIAPVW-------------------AFYLQAAFMGTVFLIGF
-PLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRH
-VCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGNF-RFSSKHALTVVLATWTIGIGV
--SIPPFF-GWS-----RFIPEGLQCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLIC
-FSYTQLLRALKAVAAQQQESA---------------------------------------
-------------------------------------------------------------
-------------------------------------------TTQKAEREVSRMVVVMVG
-SFCVCYVPYAAFAMYMVN--NRNHG-LDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQAC
-IMKMV------CGKA---MTDESDTCS-SQKTEVS-------------------------
-------------------------------------------------------------
--TVS-STQVGPN---
+--------------------MRKMS-E---------------------EEFYL-----FK
+------------------------------NISSVGPWDGPQY-----------------
+-------HIAPVW---------AFYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLN
+YILVNVSFGGFLLCIFS-VFPVFVASCN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLA
+FLAFERYIVICKPFGNF-RFSSKHALTVVLATWTIGIGV-SIPPFF-GWS-----RFIPE
+GLQCSCGPDWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRALKAVAAQQQESA
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------TTQKAEREVSRMVVVMVGSFC
+VCYVPYAAFAMYMVN---NRNH-GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQACIM
+KMVC------GKA-MTDES--DTCSSQK-TEVSTVS-STQVGPN----------------
+------------------------------------------------------------
+------------------------
> 8=opsin, greensensitive human (fragment) S07060
------------------------------------------------------------
------------------------------------------------------------
-------------------------------DLAETVIA-STISIVNQVS-GYFV--LGHP
-MCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW
--TAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIV
-LCYLQVWLAIRAVAKQQKESE---------------------------------------
------------------------------------------------------------
-------------------------------------------STQKAEKEVTRMVVVMVL
-AFC---------------------------------------------------------
+---------DLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLA
+IISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIF-GWS-----RYWPH
+GLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE
------------------------------------------------------------
------------------------------------------------------------
----------------
+---------------------------------------STQKAEKEVTRMVVVMVLAFC
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------------------------------------------
+------------------------
> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
-MAQQWSLQRLAGRHPQDS-YEDSTQ--SSIFTY-----TNSNSTRGPFEGPNY-------
-------------------HIAPRW-------------------VYHLTSVWMIFVVIASV
-FTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISVVNQVY-GYFV--LGHP
-MCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW
--TAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIV
-LCYLQVWLAIRAVAKQQKESE---------------------------------------
-------------------------------------------------------------
-------------------------------------------STQKAEKEVTRMVVVMVL
-AFCFCWGPYAFFACFAAA--NPGYP-FHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNC
-ILQLF----------GKKVDDGSELSS-ASKTEVS-------------------------
-------------------------------------------------------------
--SVS---SVSPA---
+MAQQWS-LQRLAGRHPQDSYEDSTQ-S---------------------SIFTYTNSN---
+--------------------------------STRGPFEGPNY-----------------
+-------HIAPRW---------VYHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLN
+WILVNLAVADLAETVIA-STISVVNQVY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLA
+IISWERWMVVCKPFGNV-RFDAKLAIVGIAFSWIWAAVW-TAPPIF-GWS-----RYWPH
+GLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAIRAVAKQQKESE
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------STQKAEKEVTRMVVVMVLAFC
+FCWGPYAFFACFAAA---NPGY-PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRNCIL
+QLF-------GKK-VDDGS--ELSSASK-TEVSSV---SSVSPA----------------
+------------------------------------------------------------
+------------------------
> 10== Z68193 1 human Red Opsin <>[]
-MAQQWSLQRLAGRHPQDS-YEDSTQ--SSIFTY-----TNSNSTRGPFEGPNY-------
-------------------HIAPRW-------------------VYHLTSVWMIFVVTASV
-FTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-STISIVNQVS-GYFV--LGHP
-MCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW
--TAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIM
-LCYLQVWLAIRAVAKQQKESE---------------------------------------
-------------------------------------------------------------
-------------------------------------------STQKAEKEVTRMVVVMIF
-AYCVCWGPYTFFACFAAA--NPGYA-FHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNC
-ILQLF----------GKKVDDGSELSS-ASKTEVS-------------------------
-------------------------------------------------------------
--SVS---SVSPA---
+MAQQWS-LQRLAGRHPQDSYEDSTQ-S---------------------SIFTYTNSN---
+--------------------------------STRGPFEGPNY-----------------
+-------HIAPRW---------VYHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLN
+WILVNLAVADLAETVIA-STISIVNQVS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLA
+IISWERWLVVCKPFGNV-RFDAKLAIVGIAFSWIWSAVW-TAPPIF-GWS-----RYWPH
+GLKTSCGPDVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAIRAVAKQQKESE
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------STQKAEKEVTRMVVVMIFAYC
+VCWGPYTFFACFAAA---NPGY-AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRNCIL
+QLF-------GKK-VDDGS--ELSSASK-TEVSSV---SSVSPA----------------
+------------------------------------------------------------
+------------------------
> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92]
-MTEAWNVAVFAARRSRDD--DDTTR--GSVFTY-----TNTNNTRGPFEGPNY-------
-------------------HIAPRW-------------------VYNLVSFFMIIVVIASC
-FTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-STISVFNQIF-GYFI--LGHP
-LCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW
--SAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIII
-VCYLQVWMAIRAVAAQQKESE---------------------------------------
-------------------------------------------------------------
-------------------------------------------STQKAEREVSRMVVVMIV
-AFCICWGPYASFVSFAAA--NPGYA-FHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNC
-IMQLF----------GKKVDDGSEAST-TSRTEVS-------------------------
-------------------------------------------------------------
--SVS-NSSVAPA---
+MTEAWNVAVFAARRSRDD--DDTTR-G---------------------SVFTYTNTN---
+--------------------------------NTRGPFEGPNY-----------------
+-------HIAPRW---------VYNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLN
+WILVNLAFVDLVETLVA-STISVFNQIF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLA
+IISWERWFVVCKPFGNI-KFDSKLAIIGIVFSWVWAWGW-SAPPIF-GWS-----RYWPH
+GLKTSCGPDVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAIRAVAAQQKESE
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------STQKAEREVSRMVVVMIVAFC
+ICWGPYASFVSFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRNCIM
+QLF-------GKK-VDDGS--EASTTSR-TEVSSVS-NSSVAPA----------------
+------------------------------------------------------------
+------------------------
> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
-MA-AWE-AAFAARRRHEE--EDTTR--DSVFTY-----TNSNNTRGPFEGPNY-------
-------------------HIAPRW-------------------VYNLTSVWMIFVVAASV
-FTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-STISVINQIS-GYFI--LGHP
-MCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW
--TAPPIF-GWS-----RYWPHGLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIII
-LCYLQVWLAIRAVAAQQKESE---------------------------------------
-------------------------------------------------------------
-------------------------------------------STQKAEKEVSRMVVVMIV
-AYCFCWGPYTFFACFAAA--NPGYA-FHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNC
-ILQLF----------GKKVDDGSEVST--SRTEVS-------------------------
-------------------------------------------------------------
--SVS-NSSVSPA---
+MA-AWE-AAFAARRRHEE--EDTTR-D---------------------SVFTYTNSN---
+--------------------------------NTRGPFEGPNY-----------------
+-------HIAPRW---------VYNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLN
+WILVNLAVADLGETVIA-STISVINQIS-GYFI--LGHPMCVVEGYTVSACGITALWSLA
+IISWERWFVVCKPFGNI-KFDGKLAVAGILFSWLWSCAW-TAPPIF-GWS-----RYWPH
+GLKTSCGPDVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAIRAVAAQQKESE
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------STQKAEKEVSRMVVVMIVAYC
+FCWGPYTFFACFAAA---NPGY-AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRNCIL
+QLF-------GKK-VDDGS--EVST-SR-TEVSSVS-NSSVSPA----------------
+------------------------------------------------------------
+------------------------
> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
-MS-------------------------------SNSSQAPPNGTPGPFDGPQWP------
-------------------YQAPQS-------------------TYVGVAVLMGTVVACAS
-VVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRR
-MCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW
--SAPPLL-GWS-----SYVPEGLRTSCGPNWYTGGS--NNNSYILSLFVTCFVLPLSLIL
-FSYTNLLLTLRAAAAQQKEAD---------------------------------------
-------------------------------------------------------------
-------------------------------------------TTQRAEREVTRMVIVMVM
-AFLLCWLPYSTFALVVAT--HKGII-IQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSC
-LLEML-----CCGYQPQRTGKASPGTP-GPHADVT-------------------------
-------------------------------------------------------------
--AAGLRNKVMPAHPV
+------------------------M-S---------------------SNSSQAPPN---
+--------------------------------GTPGPFDGPQW-----------------
+------PYQAPQS---------TYVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLN
+YILVNLAVADLLVTLCG-SSVSLSNNIN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLA
+ILALERYVVVCKPLGDF-QFQRRHAVSGCAFTWGWALLW-SAPPLL-GWS-----SYVPE
+GLRTSCGPNWYTGGSN--NNSYILSLFVTCFVLPLSLILFSYTNLLLTLRAAAAQQKEAD
+------------------------------------------------------------
+------------------------------------------------------------
+---------------------------------------TTQRAEREVTRMVIVMVMAFL
+LCWLPYSTFALVVAT---HKGI-IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQSCLL
+EMLCCG--YQPQR-TGKAS--PGTPGPH-ADVTAAGLRNKVMPAHP----V---------
+------------------------------------------------------------
+------------------------
> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
-MESGNVSSSLFGNVSTALRPEARLSAE----------TRLLGWNVPPEELR---------
-------------------HIPEHWLTYP----------EPPESMNYLLGTLYIFFTLMSM
-LGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--TPIFIYNSFH-QGYA--LGHL
-GCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--KMTHGKAIAMIIFIYMYATPW
--VVACYTETWG-----RFVPEGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMIT
-YYYSQIVGHVFSHEKALRDQAKKMNVESLR------------------------------
-------------------------------------------------------------
--------------------------------------SNVDKNKETAEIRIAKAAITICF
-LFFCSWTPYGVMSLIGAF--GDKTL-LTPGATMIPACACKMVACIDPFVYAISHPRYRME
-LQKRCPWL--ALNEKAPESSAVASTST-TQEPQQT-------------------------
-------------------------------------------------------------
--TAA-----------
+--------------------MESGNVS---------------------SSLF--------
+--------------------------------GNVSTALRPEA--RLSA---ETRLLGWN
+VPPEELRHIPEHWLTYPEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSN
+ILVINLAFCDFMMMVK--TPIFIYNSFH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNA
+FIAYDRFNVITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETWG-----RFVPE
+GYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQA
+KKMNVESL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RSNVDKNKETAEIRIAKAAITICFLFF
+CSWTPYGVMSLIGAF---GDKT-LLTPGATMIPACACKMVACIDPFVYAISHPRYRMELQ
+KRCPWL--ALNEK-APESS--AVASTST-TQEPQQT---------------TAA------
+------------------------------------------------------------
+------------------------
> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
-MEYHNVSSVL-GNVSSVLRPDARLSAE----------SRLLGWNVPPDELR---------
-------------------HIPEHWLIYP----------EPPESMNYLLGTLYIFFTVISM
-IGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--TPIFIYNSFH-QGYA--LGHL
-GCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--KMTHGKAIAMIIFIYLYATPW
--VVACYTESWG-----RFVPEGYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMIT
-YYYSQIVGHVFSHEKALRDQAKKMNVDSLR------------------------------
-------------------------------------------------------------
--------------------------------------SNVDKSKEAAEIRIAKAAITICF
-LFFASWTPYGVMSLIGAF--GDKTL-LTPGATMIPACTCKMVACIDPFVYAISHPRYRME
-LQKRCPWL--AISEKAPESRAAISTST-TQEQQQT-------------------------
-------------------------------------------------------------
--TAA-----------
+--------------------MEYHNVS---------------------SVL---------
+--------------------------------GNVSSVLRPDA--RLSA---ESRLLGWN
+VPPDELRHIPEHWLIYPEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSN
+ILVINLAFCDFMMMIK--TPIFIYNSFH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNA
+FIAYDRYNVITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESWG-----RFVPE
+GYLTSCTFDYLTDN--FDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQA
+KKMNVDSL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RSNVDKSKEAAEIRIAKAAITICFLFF
+ASWTPYGVMSLIGAF---GDKT-LLTPGATMIPACTCKMVACIDPFVYAISHPRYRMELQ
+KRCPWL--AISEK-APESR--AAISTST-TQEQQQT---------------TAA------
+------------------------------------------------------------
+------------------------
> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
-ME------PLCNASEPPLRPEAR-SSGNGD-------LQFLGWNVPPDQIQ---------
-------------------YIPEHWLTQL----------EPPASMHYMLGVFYIFLFCAST
-VGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIF--NSFH-RGFAIYLGNT
-WCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW
--VVLPLTQFWD-----RFVPEGYLTSCSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMIL
-YYYSQIVGHVFSHEKALREQAKKMNVESLR------------------------------
-------------------------------------------------------------
--------------------------------------SNVDKSKETAEIRIAKAAITICF
-LFFVSWTPYGVMSLIGAF--GDKSL-LTQGATMIPACTCKLVACIDPFVYAISHPRYRLE
-LQKRCPWL--GVNEKSGEISSAQSTTT-QEQQQTT-------------------------
-------------------------------------------------------------
--AA------------
+--------------------MEPLC-----------------------------------
+--------------------------------NASEPPLRPEA--R-SSGNGDLQFLGWN
+VPPDQIQYIPEHWLTQLEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSN
+MFVLNLAVFDLIMCLK--APIF--NSFH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNA
+AIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPE
+GYLTSCSFDYLSDN--FDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQA
+KKMNVESL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RSNVDKSKETAEIRIAKAAITICFLFF
+VSWTPYGVMSLIGAF---GDKS-LLTQGATMIPACTCKLVACIDPFVYAISHPRYRLELQ
+KRCPWL--GVNEK-SGEIS--SAQSTTT-QEQQQTT---------------AA-------
+------------------------------------------------------------
+------------------------
> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
-MD------ALCNASEPPLRPEARMSSGSDE-------LQFLGWNVPPDQIQ---------
-------------------YIPEHWLTQL----------EPPASMHYMLGVFYIFLFFAST
-LGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--APIFIYNSFH-RGFA--LGNT
-WCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW
--VVLPLTQFWD-----RFVPEGYLTSCSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMIL
-YYYSQIVGHVFNHEKALREQAKKMNVESLR------------------------------
-------------------------------------------------------------
--------------------------------------SNVDKSKETAEIRIAKAAITICF
-LFFVSWTPYGVMSLIGAF--GDKSL-LTPGATMIPACTCKLVACIEPFVYAISHPRYRME
-LQKRCPWL--GVNEKSGEASSAQSTTT-QEQTQQT-------------------------
-------------------------------------------------------------
--SAA-----------
+--------------------MDALC-----------------------------------
+--------------------------------NASEPPLRPEA--RMSSGSDELQFLGWN
+VPPDQIQYIPEHWLTQLEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSN
+MFVLNLAVFDLIMCLK--APIFIYNSFH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNA
+AIGYDRYNVITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFWD-----RFVPE
+GYLTSCSFDYLSDN--FDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQA
+KKMNVESL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RSNVDKSKETAEIRIAKAAITICFLFF
+VSWTPYGVMSLIGAF---GDKS-LLTPGATMIPACTCKLVACIEPFVYAISHPRYRMELQ
+KRCPWL--GVNEK-SGEAS--SAQSTTT-QEQTQQT---------------SAA------
+------------------------------------------------------------
+------------------------
> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1
-M-----------TNATGPQMAYYGAASMDFGYPE---GVSIVDFVRPEIKP---------
-------------------YVHQHWYNYP----------PVNPMWHYLLGVIYLFLGTVSI
-FGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQ
-YCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNGP-KLTTGKAVVFALISWVIAIGC
--ALPPFF-GWG-----NYILEGILDSCSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIV
-FSYVFIVKAIFAHEAAMRAQAKKMNVSTLR------------------------------
-------------------------------------------------------------
--------------------------------------S-NEADAQRAEIRIAKTALVNVS
-LWFICWTPYALISLKGVM--GDTSG-ITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLA
-ITQHLPWF--CVHETETKSNDDSQSNS-TVAQDKA-------------------------
-------------------------------------------------------------
----------------
+--------------------MTNAT-G---------------------PQMAY-----YG
+--------------------------------AASMDFGYPE---GVSIVD--------F
+VRPEIKPYVHQHWYNYPPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPAN
+ILVVNLALSDLIMLTTN-VPFFTYNCFSGGVWM--FSPQYCEIYACLGAITGVCSIWLLC
+MISFDRYNIICNGFNGP-KLTTGKAVVFALISWVIAIGC-ALPPFF-GWG-----NYILE
+GILDSCSYDYLTQD--FNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQA
+KKMNVSTL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RS-NEADAQRAEIRIAKTALVNVSLWF
+ICWTPYALISLKGVM---GDTS-GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRLAIT
+QHLPWF--CVHET-ETKSN--DDSQSNS-TVAQDKA------------------------
+------------------------------------------------------------
+------------------------
> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1
-M-----------ANVTGPQMAFYGSGAATFGYPE---GMTVADFVPDRVKH---------
-------------------MVLDHWYNYP----------PVNPMWHYLLGVVYLFLGVISI
-AGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGT
-YCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNGP-KLTQGKATFMCGLAWVISVGW
--SLPPFF-GWG-----SYTLEGILDSCSYDYFTRD--MNTITYNICIFIFDFFLPASVIV
-FSYVFIVKAIFAHEAAMRAQAKKMNVTNLR------------------------------
-------------------------------------------------------------
--------------------------------------S-NEAETQRAEIRIAKTALVNVS
-LWFICWTPYAAITIQGLL--GNAEG-ITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLA
-ITQHLPWF--CVHEKDPNDVEENQSSN-TQTQEKS-------------------------
-------------------------------------------------------------
----------------
+--------------------MANVT-G---------------------PQMAF-----YG
+--------------------------------SGAATFGYPE---GMTVAD--------F
+VPDRVKHMVLDHWYNYPPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPAN
+MLIVNLALSDLIMLTTN-FPPFCYNCFSGGRWM--FSGTYCEIYAALGAITGVCSIWTLC
+MISFDRYNIICNGFNGP-KLTQGKATFMCGLAWVISVGW-SLPPFF-GWG-----SYTLE
+GILDSCSYDYFTRD--MNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQA
+KKMNVTNL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RS-NEAETQRAEIRIAKTALVNVSLWF
+ICWTPYAAITIQGLL---GNAE-GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRLAIT
+QHLPWF--CVHEK-DPNDV--EENQSSN-TQTQEKS------------------------
+------------------------------------------------------------
+------------------------
> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
-ME-----SFAVAAAQLGPHFAPLS-------------NGSVVDKVTPDMAH---------
-------------------LISPYWNQFP----------AMDPIWAKILTAYMIMIGMISW
-CGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPM
-MCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKM----------
------------------YVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLIC
-YSYWFIIAAVSAHEKAMREQAKKMNVKSLR------------------------------
-------------------------------------------------------------
--------------------------------------S-SEDAEKSAEGKLAKVALVTIT
-LWFMAWTPYLVINCMGLF---KFEG-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLA
-LKEKCPCC--VFGKVDDGKSSDAQSQA-TASEAESKA-----------------------
-------------------------------------------------------------
----------------
+MES--------------FAVAAAQL-G---------------------PHFA--------
+----------------------------------PLS--------NGSVVD--------K
+VTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPAN
+LLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMC
+MISLDRYQVIVKGMAGR-PMTIPLALGKM---------------------------YVPE
+GNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQA
+KKMNVKSL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RS-SEDAEKSAEGKLAKVALVTITLWF
+MAWTPYLVINCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK
+EKCPCC--VFGKV-DDGKS--SDAQSQA-TASEAES------KA----------------
+------------------------------------------------------------
+------------------------
> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
-ME-----SFAVAAAQLGPHFAPLS-------------NGSVVDKVTPDMAH---------
-------------------LISPYWNQFP----------AMDPIWAKILTAYMIMIGMISW
-CGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPM
-MCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW
--CLAPAF-GWS-----RYVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLIC
-YSYWFIIAAVSAHEKAMREQAKKMNVKSLR------------------------------
-------------------------------------------------------------
--------------------------------------S-SEDAEKSAEGKLAKVALVTIT
-LWFMAWTPYLVINCMGLF---KFEG-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLA
-LKEKCPCC--VFGKVDDGKSSDAQSQA-TASEAESKA-----------------------
-------------------------------------------------------------
----------------
+MES--------------FAVAAAQL-G---------------------PHFA--------
+----------------------------------PLS--------NGSVVD--------K
+VTPDMAHLISPYWNQFPAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPAN
+LLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMC
+MISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSSIW-CLAPAF-GWS-----RYVPE
+GNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQA
+KKMNVKSL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RS-SEDAEKSAEGKLAKVALVTITLWF
+MAWTPYLVINCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK
+EKCPCC--VFGKV-DDGKS--SDAQSQA-TASEAES------KA----------------
+------------------------------------------------------------
+------------------------
> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
-MD-----SFAAVATQLGPQFAAPS-------------NGSVVDKVTPDMAH---------
-------------------LISPYWDQFP----------AMDPIWAKILTAYMIIIGMISW
-CGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPM
-MCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIW
-CCLAPVF-GWS-----RYVPEGNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLIC
-YSYWFIIAAVSAHEKAMREQAKKMNVKSLR------------------------------
-------------------------------------------------------------
--------------------------------------S-SEDADKSAEGKLAKVALVTIS
-LWFMAWTPYLVINCMGLF---KFEG-LTPLNTIWGACFAKSAACYNPIVYGISHPKYRLA
-LKEKCPCC--VFGKVDDGKSSEAQSQA-TTSEAESKA-----------------------
-------------------------------------------------------------
----------------
+MDS--------------FAAVATQL-G---------------------PQFA--------
+----------------------------------APS--------NGSVVD--------K
+VTPDMAHLISPYWDQFPAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPAN
+LLVINLAISDFGIMITN-TPMMGINLYF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMC
+MISLDRYQVIVKGMAGR-PMTIPLALGKIAYIWFMSTIWCCLAPVF-GWS-----RYVPE
+GNLTSCGIDYLERD--WNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQA
+KKMNVKSL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RS-SEDADKSAEGKLAKVALVTISLWF
+MAWTPYLVINCMGLF---KF-E-GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRLALK
+EKCPCC--VFGKV-DDGKS--SEAQSQA-TTSEAES------KA----------------
+------------------------------------------------------------
+------------------------
> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
-MERSHLPETPFDLAHSGPRFQAQSSG-----------NGSVLDNVLPDMAH---------
-------------------LVNPYWSRFA----------PMDPMMSKILGLFTLAIMIISC
-CGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPL
-WCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFW
--TVMPLI-GWS-----AYVPEGNLTACSIDYMTRM--WNPRSYLITYSLFVYYTPLFLIC
-YSYWFIIAAVAAHEKAMREQAKKMNVKSLR------------------------------
-------------------------------------------------------------
--------------------------------------S-SEDCDKSAEGKLAKVALTTIS
-LWFMAWTPYLVICYFGLF---KIDG-LTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIV
-LKEKCPMC--VFGNTDEPKPDAPASDTETTSEADSKA-----------------------
-------------------------------------------------------------
----------------
+MERSHL---------PETPFDLAHS-G---------------------PRFQ--------
+----------------------------------AQSSG------NGSVLD--------N
+VLPDMAHLVNPYWSRFAPMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPAN
+LLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMC
+MIAFDRYNVIVKGINGT-PMTIKTSIMKILFIWMMAVFW-TVMPLI-GWS-----AYVPE
+GNLTACSIDYMTRM--WNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQA
+KKMNVKSL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RS-SEDCDKSAEGKLAKVALTTISLWF
+MAWTPYLVICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRIVLK
+EKCPMC--VFGNT-DEPKP--DAPASDTETTSEADS------KA----------------
+------------------------------------------------------------
+------------------------
> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
-MERSLLPEPPLAMALLGPRFEAQTGG-----------NRSVLDNVLPDMAP---------
-------------------LVNPHWSRFA----------PMDPTMSKILGLFTLVILIISC
-CGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPL
-WCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW
--TIMPLI-GWS-----SYVPEGNLTACSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMIC
-YSYWFIIATVAAHEKAMRDQAKKMNVKSLR------------------------------
-------------------------------------------------------------
--------------------------------------S-SEDCDKSAENKLAKVALTTIS
-LWFMAWTPYLIICYFGLF---KIDG-LTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLV
-LKEKCPMC--VCGTTDEPKPDAPPSDTETTSEAESKD-----------------------
-------------------------------------------------------------
----------------
+MERSLL---------PEPPLAMALL-G---------------------PRFE--------
+----------------------------------AQTGG------NRSVLD--------N
+VLPDMAPLVNPHWSRFAPMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPAN
+LLVLNLAFSDFCMMASQ-SPVMIINFYY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMC
+MIAFDRYNVIVKGINGT-PMTIKTSIMKIAFIWMMAVFW-TIMPLI-GWS-----SYVPE
+GNLTACSIDYMTRQ--WNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQA
+KKMNVKSL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RS-SEDCDKSAENKLAKVALTTISLWF
+MAWTPYLIICYFGLF---KI-D-GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRLVLK
+EKCPMC--VCGTT-DEPKP--DAPPSDTETTSEAES------KD----------------
+------------------------------------------------------------
+------------------------
> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
-M-----------IAVSGPSYEAFSYGGQARFN-----NQTVVDKVPPDMLH---------
-------------------LIDANWYQYP----------PLNPMWHGILGFVIGMLGFVSA
-MGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-SPPMVINCYY-ETWV--LGPL
-FCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGW
--TIAPMF-GWN-----RYVPEGNMTACGTDYFNRG--LLSASYLVCYGIWVYFVPLFLII
-YSYWFIIQAVAAHEKNMREQAKKMNVASLR------------------------------
-------------------------------------------------------------
--------------------------------------S-SENQNTSAECKLAKVALMTIS
-LWFMAWTPYLVINFSGIF---NLVK-ISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAA
-LFAKFPSL--ACAA-EPSSDAVSTTSG-TTTVTDNEKSNA--------------------
-------------------------------------------------------------
----------------
+--------------------MIAVS-G---------------------PSYE--------
+----------------------------------AFSYGGQARFNNQTVVD--------K
+VPPDMLHLIDANWYQYPPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSN
+LFVINLAISNFLMMFCM-SPPMVINCYY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMT
+MIAFDRYNVIVKGLSGK-PLSINGALIRIIAIWLFSLGW-TIAPMF-GWN-----RYVPE
+GNMTACGTDYFNRG--LLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQA
+KKMNVASL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RS-SENQNTSAECKLAKVALMTISLWF
+MAWTPYLVINFSGIF---NL-V-KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRAALF
+AKFPSL--AC-AA-EPSSD--AVSTTSG-TTTVTDN------EKS------NA-------
+------------------------------------------------------------
+------------------------
> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
-M------ANQLSYSSLGWPYQP---------------NASVVDTMPKEMLY---------
-------------------MIHEHWYAFP----------PMNPLWYSILGVAMIILGIICV
-LGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPF
-MCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW
--TILPFF-GWS-----RYVPEGNLTSCTVDYLTKD--WSSASYVVIYGLAVYFLPLITMI
-YCYFFIVHAVAEHEKQLREQAKKMNVASLR------------------------------
-------------------------------------------------------------
--------------------------------------ANADQQKQSAECRLAKVAMMTVG
-LWFMAWTPYLIISWAGVF--SSGTR-LTPLATIWGSVFAKANSCYNPIVYGISHPRYKAA
-LYQRFPSL--ACGSGESGSDVKSEASA-TTTMEEKPKIPEA-------------------
-------------------------------------------------------------
----------------
+----------------------MAN-Q---------------------LSYS--------
+----------------------------------SLGWPYQP---NASVVD--------T
+MPKEMLYMIHEHWYAFPPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTN
+LLVVNLAFSDFCMMAFM-MPTMTSNCFA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMV
+MITLDRYNVIVRGMAAA-PLTHKKATLLLLFVWIWSGGW-TILPFF-GWS-----RYVPE
+GNLTSCTVDYLTKD--WSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQA
+KKMNVASL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RANADQQKQSAECRLAKVAMMTVGLWF
+MAWTPYLIISWAGVF---SSGT-RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKAALY
+QRFPSL--ACGSG-ESGSD--VKSEASA-TTTMEEK------PKI------PEA------
+------------------------------------------------------------
+------------------------
> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
-M-------------------------------VESTTLVNQTWWYNPTV-----------
-------------------DIHPHWAKFD----------PIPDAVYYSVGIFIGVVGIIGI
-LGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGFPLKTISAFM-KKWI--FGKV
-ACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW
--SVGPVF-NWG-----AYVPEGILTSCSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIA
-FCYFNIVMSVSNHEKEMAAMAKRLNAKELR------------------------------
-------------------------------------------------------------
----------------------------------------KAQAGASAEMKLAKISMVIIT
-QFMLSWSPYAIIALLAQF--GPAEW-VTPYAAELPVLFAKASAIHNPIVYSVSHPKFREA
-IQTTFPWLLTCCQFDEKECEDANDAEE-EVVASER--GGESRDAAQMKEMMAMMQKMQAQ
-QAAYQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAP
-QGAPPQGVDNQAYQA
+----------------------MVE-S---------------------TTLV--------
+----------------------------------NQTWWY-----NPTV-----------
+-------DIHPHWAKFDPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPAN
+MFIINLAMSDLSFSAINGFPLKTISAFM-KKWI--FGKVACQLYGLLGGIFGFMSINTMA
+MISIDRYNVIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVF-NWG-----AYVPE
+GILTSCSFDYLSTD--PSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMA
+KRLNAKEL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RK-AQ-AGASAEMKLAKISMVIITQFM
+LSWSPYAIIALLAQF---GPAE-WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFREAIQ
+TTFPWLLTCCQFD-EKECE--DANDAEE-EVVASER----GGESR------DAAQMKEMM
+AMMQKMQAQQAAYQP---PPPPQGYPPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQ
+GAPPQVEAPQGAPPQGVDNQAYQA
> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93]
-M--------------------------------GRDLRDNETWWYNPSI-----------
-------------------VVHPHWREFD----------QVPDAVYYSLGIFIGICGIIGC
-GGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFA
-ACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW
--AIGPIF-GWG-----AYTLEGVLCNCSFDYISRD--STTRSNILCMFILGFFGPILIIF
-FCYFNIVMSVSNHEKEMAAMAKRLNAKELR------------------------------
-------------------------------------------------------------
----------------------------------------KAQAGANAEMRLAKISIVIVS
-QFLLSWSPYAVVALLAQF--GPLEW-VTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREA
-ISQTFPWVLTCCQFDDKETEDDKDAET-EIPAGESSDAAPSADAAQMKEMMAMMQKMQQQ
-QAAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQGAPP-----
--AAPPQGVDNQAYQA
+-----------------------MG-R---------------------DLRD--------
+----------------------------------NETWWY-----NPSI-----------
+-------VVHPHWREFDQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPAN
+MFIINLAFSDFTFSLVNGFPLMTISCFL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMA
+MISIDRYNVIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIF-GWG-----AYTLE
+GVLCNCSFDYISRD--STTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMA
+KRLNAKEL----------------------------------------------------
+------------------------------------------------------------
+---------------------------------RK-AQ-AGANAEMRLAKISIVIVSQFL
+LSWSPYAVVALLAQF---GPLE-WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFREAIS
+QTFPWVLTCCQFD-DKETE--DDKDAET-EIPAGES--SDAAPSA------DAAQMKEMM
+AMMQKMQQQQAAYPPQGYAPPPQGYPPQGYPPQGY--PPQGYPPQGYPP---PPQGAPPQ
+GAPP------AAPPQGVDNQAYQA
> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra
-MMDVNSSGRPDLYGHLRSFLLPEVGRGLPDLSPDGGADPVAGSWAPHLLS---EVTA---
-------------------SPAPTWDAPPDNASGCGEQINYGRVEKVVIGSILTLITLLTI
-AGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHF
-FCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI
--TLPPLF-GWA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPMSVML
-FMYYQIYKAARKSAAKHKFP--------------GFPRVEPDSVIALNGIVKLQKEVEE-
-------------------------------------------------------------
----------------------------CANLSRLLKHERKNISIFKREQKAATTLGIIVG
-AFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTT
-YRSLL-----QCQYRNINRKLSAAGMH-EALKLAERPERPEFVLQNADYCRKKGHDS---
-------------------------------------------------------------
----------------
+M-------------------MDVNSSGRPDLY----GHLRSFL--LPEVGRGLPDLSPDG
+------------GADPVAGSWAPHLLS---EVTASPA---PTW----------------D
+APPDNASGCGEQI----NYGRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSN
+YLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLC
+VISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVND
+DKVCLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG
+--------------FPRVEPDSVIALNG--------------------------------
+------------------------------IVKLQKE-------------------VEEC
+AN------------------LSRLLKH------ER-KNISIFKREQKAATTLGIIVGAFT
+VCWLPFFLLSTARPFICGTSCS-CIPLWVERTFLWLGYANSLINPFIYAFFNRDLRTTYR
+SLLQCQ--YRNIN-RKLSA----AGMHE-ALKLAER------PERPEFVLQNADYCRK--
+------------------------------------------------------------
+-------------------KGHDS
> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
-M--------------------------------------------PHLLSGFLEVTA---
-------------------SPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTI
-AGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHF
-FCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI
--TLPPLF-GWA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPMSVML
-FMYYQIYKAARKSAAKHKFP--------------GFPRVQPESVISLNGVVKLQKEVEE-
-------------------------------------------------------------
----------------------------CANLSRLLKHERKNISIFKREQKAATTLGIIVG
-AFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPT
-SRSLL-----QCQYRNINRKLSAAGMH-EALKLAERPERSEFVLQNSDHCGKKGHDT---
-------------------------------------------------------------
----------------
+------------------------------------------------------------
+---------------------MPHLLSGFLEVTASPA---PTW----------------D
+APPDNVSGCGEQI----NYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSN
+YLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLC
+VISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVND
+DKVCLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG
+--------------FPRVQPESVISLNG--------------------------------
+------------------------------VVKLQKE-------------------VEEC
+AN------------------LSRLLKH------ER-KNISIFKREQKAATTLGIIVGAFT
+VCWLPFFLLSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRPTSR
+SLLQCQ--YRNIN-RKLSA----AGMHE-ALKLAER------PERSEFVLQNSDHCGK--
+------------------------------------------------------------
+-------------------KGHDT
> 31=p A47425 serotonin receptor 5HT-7 - rat
-M--------------------------------------------PHLLSGFLEVTA---
-------------------SPAPTWDAPPDNVSGCGEQINYGRVEKVVIGSILTLITLLTI
-AGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHF
-FCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI
--TLPPLF-GWA-----QNVNDDKVCLISQDF----------GYTIYSTAVAFYIPMSVML
-FMYYQIYKAARKSAAKHKFP--------------GFPRVQPESVISLNGVVKLQKEVEE-
-------------------------------------------------------------
----------------------------CANLSRLLKHERKNISIFKREQKAATTLGIIVG
-AFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTT
-YRSLL-----QCQYRNINRKLSAAGMH-EALKLAERPERSEFVLQNSDHCGKKGHDT---
-------------------------------------------------------------
----------------
+------------------------------------------------------------
+---------------------MPHLLSGFLEVTASPA---PTW----------------D
+APPDNVSGCGEQI----NYGRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSN
+YLIVSLALADLSVAVAV-MPFVSVTDLIGGKWI--FGHFFCNVFIAMDVMCCTASIMTLC
+VISIDRYLGITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLF-GWA-----QNVND
+DKVCLISQDF----------GYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKFPG
+--------------FPRVQPESVISLNG--------------------------------
+------------------------------VVKLQKE-------------------VEEC
+AN------------------LSRLLKH------ER-KNISIFKREQKAATTLGIIVGAFT
+VCWLPFFLLSTARPFICGTSCS-CIPLWVERTCLWLGYANSLINPFIYAFFNRDLRTTYR
+SLLQCQ--YRNIN-RKLSA----AGMHE-ALKLAER------PERSEFVLQNSDHCGK--
+------------------------------------------------------------
+-------------------KGHDT
> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
-MDVLSPGQGNNTTSPPAPF-----------------------------------------
-------------------ETGGNTTGIS----------DVTVSYQVITSLLLGTLIFCAV
-LGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQV
-TCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI
--SIPPML-GWRTP---EDRSDPDACTISKDH----------GYTIYSTFGAFYIPLLLML
-VLYGRIFRAARFRIRKTVKKVEKTGADTRHGASPAPQPKKS---------VNGESGSRNW
-RLGVESKAGGALCANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAPASFER
-KNERNA-------------------------------EAKRKMALARERKTVKTLGIIMG
-TFILCWLPFFIVALVLPF-CESSCH-MPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNA
-FKKII-----KCKFCRQ-------------------------------------------
-------------------------------------------------------------
----------------
+--------------------MDVLSPG-------------------------------QG
+--------------------------N---NTTSPPA---PFE-----------------
+-TGGNTTGIS-------DVTVSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVAN
+YLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLC
+AIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTP---EDRSD
+PDACTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVKKV
+EKTGADTRHGASPAPQPKK-----SVNGE--SGSRNWRLGVESKAGGALCA---------
+------NGAVRQGD---------------------------------DGAALEVIEVHRV
+GNSKEHLPLPSEAG--PTPCAPASFERKNERNAEA-KRKMALARERKTVKTLGIIMGTFI
+LCWLPFFIVALVLPF---CESSCHMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK
+KIIKCK--FCRQ------------------------------------------------
+------------------------------------------------------------
+------------------------
> 33=p A35181 serotonin receptor class 1A - rat
-MDVFSFGQGNNTTASQEPF-----------------------------------------
-------------------GTGGNVTSIS----------DVTFSYQVITSLLLGTLIFCAV
-LGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQV
-TCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI
--SIPPML-GWRTP---EDRSDPDACTISKDH----------GYTIYSTFGAFYIPLLLML
-VLYGRIFRAARFRIRKTVRKVEKKGAGTSLGTSSAPPPKKS---------LNGQPGSGDW
-RRCAENRAVGTPCTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAPACLER
-KNERNA-------------------------------EAKRKMALARERKTVKTLGIIMG
-TFILCWLPFFIVALVLPF-CESSCH-MPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNA
-FKKII-----KCKFCRR-------------------------------------------
-------------------------------------------------------------
----------------
+--------------------MDVFSFG-------------------------------QG
+--------------------------N---NTTASQE---PFG-----------------
+-TGGNVTSIS-------DVTFSYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVAN
+YLIGSLAVTDLMVSVLV-LPMAALYQVL-NKWT--LGQVTCDLFIALDVLCCTSSILHLC
+AIALDRYWAITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPML-GWRTP---EDRSD
+PDACTISKDH----------GYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRKTVRKV
+EKKGAGTSLGTSSAPPPKK-----SLNGQ--PGSGDWRRCAENRAVGTPCT---------
+------NGAVRQGD---------------------------------DEATLEVIEVHRV
+GNSKEHLPLPSESG--SNSYAPACLERKNERNAEA-KRKMALARERKTVKTLGIIMGTFI
+LCWLPFFIVALVLPF---CESSCHMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQNAFK
+KIIKCK--FCRR------------------------------------------------
+------------------------------------------------------------
+------------------------
> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
-MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS
-HSSRSRDNASANDTSATNMTDDRYWSLT----------VYSHEHLVLTSVILGLFVLCCI
-IGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSE
-VCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI
--SIPPLF-GWRDP--NNDPDKTGTCIISQDK----------GYTIFSTVGAFYLPMLVMM
-IIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKK
-KRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEE
-ASIAMLERQCNNGKKISSNDTPYS-------------RTREKLELKRERKAARTLAIITG
-AFLICWLPFFIIALIGPF--VDPEG-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSA
-FQKIL-----FGKYRRGHR-----------------------------------------
-------------------------------------------------------------
----------------
+--------------------MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG
+LVTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM----------
+-TDDRYWSLT-------VYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVAN
+YLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLV
+AIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDP--NNDPDK
+TGTCIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQM
+TKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNR
+AKKLPENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQC
+NNGKK-------------------ISSNDTPYSRT-REKLELKRERKAARTLAIITGAFL
+ICWLPFFIIALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ
+KILFGK--YRRGH-----------------------------------------------
+------------------------------------------------------------
+-------------------R----
> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
-MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTGLVTSDFNDSYGLTGQFINGS
-HSSRSRDNASANDTSATNMTDDRYWSLT----------VYSHEHLVLTSVILGLFVLCCI
-IGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSE
-VCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI
--SIPPLF-GWRDP--NNDPDKTGTCIISQDK----------GYTIFSTVGAFYLPMLVMM
-IIYIRIWLVARSRIRKDKFQMTKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKK
-KRRAPFKSYGCSPRPERKKNRAKKLPENANGVNSNSSSSERLKQIQIETAEAFANGCAEE
-ASIAMLERQCNNGKKISSNDTPYS-------------RTREKLELKRERKAARTLAIITG
-AFLICWLPFFIIALIGPF--VDPEG-IPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSA
-FQKIL-----FGKYRRGHR-----------------------------------------
-------------------------------------------------------------
----------------
+--------------------MANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG
+LVTSDFNDSYGLTGQFINGSHSSRSRD---NASANDT--------SATNM----------
+-TDDRYWSLT-------VYSHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVAN
+YLILSLAVADLMVAVLV-MPLSVVSEIS-KVWF--LHSEVCDMWISVDVLCCTASILHLV
+AIAMDRYWAVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLF-GWRDP--NNDPDK
+TGTCIISQDK----------GYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQM
+TKARLKTEETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKKNR
+AKKLPENANGVNSNSSS----------SERLKQIQIETAEAFANGCAEEASIAML-ERQC
+NNGKK-------------------ISSNDTPYSRT-REKLELKRERKAARTLAIITGAFL
+ICWLPFFIIALIGPF---VDPE-GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRSAFQ
+KILFGK--YRRGH-----------------------------------------------
+------------------------------------------------------------
+-------------------R----
> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi
-MEGAEGQEELDWEALYLRLPLQNCSWNSTGWEPNWNVTV---------------------
-------------------VPNTTWWQASAPF-----DTPAALVRAAAKAVVLGLLILATV
-VGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPE
-LCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV
--CIAQLL-GWKDPDWNQRVSEDLRCVVSQDV----------GYQIFATASSFYVPVLIIL
-ILYWRIYQTARKRIRRRRGATARGGV--------GPPPVPAGGALVAGGGSGG-------
------------------------------IAAAVVAVIGRPLPTISETTTTGFTNVSSNN
-TSPE--KQSCANGLEADPPTTGYGAVAAAYYPSLVRRKPKEAADSKRERKAAKTLAIITG
-AFVACWLPFFVLAILVPT---CDCE-VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHA
-FQRLL-----CGRRVRRRRAPQ--------------------------------------
-------------------------------------------------------------
----------------
+--------------------MEGAE-GQEELD-------------WEALYLRLP------
+-------------------------LQ---NCSWNSTGWEPNW--NVTVV----------
+-PNTTWWQASAPFDT--PAALVRAAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAAN
+NLILSLAVADLLVACLV-MPLGAVYEVV-QRWT--LGPELCDMWTSGDVLCCTASILHLV
+AIALDRYWAVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLL-GWKDPDWNQRVSE
+DLRCVVSQDV----------GYQIFATASSFYVPVLIILILYWRIYQTARKRIRRRRGAT
+ARGGVG--------PPPV------------------------------------------
+----PAGGALVAGGGSGGIAAAVVAVIGRPLPTISETTTTGFTNVSSNNTSPE---KQSC
+ANGLEADPPTTGYGAVAAAYYPSLVRR------KP-KEAADSKRERKAAKTLAIITGAFV
+ACWLPFFVLAILVPT---CDCE--VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRHAFQ
+RLLCGR--RVRRR-----------------------------------------------
+------------------------------------------------------------
+-------------------RAPQ-
> 1== M63632 1 Lampetra japonica rhodopsin <>[BBRC174,1125-1132'91]
-M-----------------------------------------------------------
-------------------------------NGTE--GDNFYV----PF------------
---------------------------SNKTGLARSPYEYPQY-YLA-----EPWK-----
-----YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMANLFMVLFG-F
-TVTMYTS-MN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYIVICKPMGN-F
-RFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGPDYYTLNPNFN
-NESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KEAAAAQQE----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------SASTQKAEKEVTRMV
-VLMVIGFLVCWVPYASVAFYIFT-HQG-S--DFGATFMTLPAFFAKSSALYNPVIYILMN
-KQFRNCMITTLC-C---GKNP------------------------------LGDDE--SG
-ASTSKT-E--VSSVS-TSPV----------------------------------------
----------------------------------------------SP-A---
+MN----------------------------------------------------------
+GTE--GDNFYVP-------------------FSNKTGLARSPYEYPQ---------Y-YL
+AEPWK---------YSALAAYMFFLILVGFPVNFLTLFVTVQHKKLRTPLNYILLNLAMA
+NLFMVLFG-FTVTMYTS-MN-GYFV--FGPTMCSIEGFFATLGGEVALWSLVVLAIERYI
+VICKPMGN-FRFGNTHAIMGVAFTWIMALAC-AAPPLVG-W-----SRYIPEGMQCSCGP
+DYYTLNPNFNNESYVVYMFVVHFLVPFVIIFFCYGRLLCTV----KE-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-AAAAQQ---------------------------------ESASTQKAEKEVTRMVVLMV
+IGFLVCWVPYASVAFYIFT-HQGS--DFGATFMTLPAFFAKSSALYNPVIYILMNKQFRN
+CMITTLC----C---GKNPLGD-DE--SGASTSKTEV-----------------------
+----------------------------------------------------------SS
+VS-------TSPVSP-A----------
> 2== U22180 1 rat opsin <rod>[J.Mol.Neurosci.5(3),207-209'94]
-M-----------------------------------------------------------
-------------------------------NGTE--GPNFYV----PF------------
---------------------------SNITGVVRSPFEQPQY-YLA-----EPWQ-----
-----FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVFGG-F
-TTTLYTS-LH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYVVVCKPMSN-F
-RFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGIDYYTLKPEVN
-NESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KEAAAQQQE----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------SATTQKAEKEVTRMV
-IIMVIFFLICWLPYASVAMYIFT-HQG-S--NFGPIFMTLPAFFAKTASIYNPIIYIMMN
-KQFRNCMLTSLC-C---GKNP------------------------------LGDDE--AS
-ATASKT-E--------TSQV----------------------------------------
----------------------------------------------AP-A---
+MN----------------------------------------------------------
+GTE--GPNFYVP-------------------FSNITGVVRSPFEQPQ---------Y-YL
+AEPWQ---------FSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLAVA
+DLFMVFGG-FTTTLYTS-LH-GYFV--FGPTGCNLEGFFATLGGEIGLWSLVVLAIERYV
+VVCKPMSN-FRFGENHAIMGVAFTWVMALAC-AAPPLVG-W-----SRYIPEGMQCSCGI
+DYYTLKPEVNNESFVIYMFVVHFTIPMIVIFFCYGQLVFTV----KE-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-AAAQQQ---------------------------------ESATTQKAEKEVTRMVIIMV
+IFFLICWLPYASVAMYIFT-HQGS--NFGPIFMTLPAFFAKTASIYNPIIYIMMNKQFRN
+CMLTSLC----C---GKNPLGD-DE--ASATASKTE------------------------
+------------------------------------------------------------
+---------TSQVAP-A----------
> 3== M92038 1 chicken green sensitive cone opsin <retina>[PNAS89,5932-5936'9
-M-----------------------------------------------------------
-------------------------------NGTE--GINFYV----PM------------
---------------------------SNKTGVVRSPFEYPQY-YLA-----EPWK-----
-----YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVADLFMACFG-F
-TVTFYTA-WN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYIVVCKPMGN-F
-RFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGPDYYTHNPDYH
-NESYVLYMFVIHFIIPVVVIFFSYGRLICKV----REAAAQQQE----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------SATTQKAEKEVTRMV
-ILMVLGFMLAWTPYAVVAFWIFT-NKG-A--DFTATLMAVPAFFSKSSSLYNPIIYVLMN
-KQFRNCMITTIC-C---GKNP------------------------------FGDEDVSST
-VSQSKT-E--VSSVS-SSQV----------------------------------------
----------------------------------------------SP-A---
+MN----------------------------------------------------------
+GTE--GINFYVP-------------------MSNKTGVVRSPFEYPQ---------Y-YL
+AEPWK---------YRLVCCYIFFLISTGLPINLLTLLVTFKHKKLRQPLNYILVNLAVA
+DLFMACFG-FTVTFYTA-WN-GYFV--FGPVGCAVEGFFATLGGQVALWSLVVLAIERYI
+VVCKPMGN-FRFSATHAMMGIAFTWVMAFSC-AAPPLFG-W-----SRYMPEGMQCSCGP
+DYYTHNPDYHNESYVLYMFVIHFIIPVVVIFFSYGRLICKV----RE-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-AAAQQQ---------------------------------ESATTQKAEKEVTRMVILMV
+LGFMLAWTPYAVVAFWIFT-NKGA--DFTATLMAVPAFFSKSSSLYNPIIYVLMNKQFRN
+CMITTIC----C---GKNPFGD-EDVSSTVSQSKTEV-----------------------
+----------------------------------------------------------SS
+VS-------SSQVSP-A----------
> 4=p A45229 opsin, green-sensitive (clone GFgr-1) - goldfish
-M-----------------------------------------------------------
-------------------------------NGTE--GKNFYV----PM------------
---------------------------SNRTGLVRSPFEYPQY-YLA-----EPWQ-----
-----FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVAGTIMVCFG-F
-TVTFYTA-IN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-F
-KFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGPDYYTLNPDYN
-NESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KAAAAQQQD----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------SASTQKAEREVTKMV
-ILMVFGFLIAWTPYATVAAWIFF-NKG-A--DFSAKFMAIPAFFSKSSALYNPVIYVLLN
-KQFRNCMLTTIF-C---GKNP------------------------------LGDDE-SST
-VSTSKT-E--VSS------V----------------------------------------
----------------------------------------------SP-A---
+MN----------------------------------------------------------
+GTE--GKNFYVP-------------------MSNRTGLVRSPFEYPQ---------Y-YL
+AEPWQ---------FKILALYLFFLMSMGLPINGLTLVVTAQHKKLRQPLNFILVNLAVA
+GTIMVCFG-FTVTFYTA-IN-GYFV--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYI
+VVCKPMGS-FKFSSSHAFAGIAFTWVMALAC-AAPPLFG-W-----SRYIPEGMQCSCGP
+DYYTLNPDYNNESYVIYMFVCHFILPVAVIFFTYGRLVCTV----KA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-AAAQQQ---------------------------------DSASTQKAEREVTKMVILMV
+FGFLIAWTPYATVAAWIFF-NKGA--DFSAKFMAIPAFFSKSSALYNPVIYVLLNKQFRN
+CMLTTIF----C---GKNPLGD-DE-SSTVSTSKTEV-----------------------
+----------------------------------------------------------SS
+------------VSP-A----------
> 5=p B45229 opsin, green-sensitive (clone GFgr-2) - goldfish
-M-----------------------------------------------------------
-------------------------------NGTE--GNNFYV----PL------------
---------------------------SNRTGLVRSPFEYPQY-YLA-----EPWQ-----
-----FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVAGAIMVCFG-F
-TVTFYTA-IN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYIVVCKPMGS-F
-KFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGPDYYTLNPEYN
-NESYVLYMFICHFILPVTIIFFTYGRLVCTV----KAAAAQQQD----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------SASTQKAEREVTKMV
-ILMVLGFLVAWTPYATVAAWIFF-NKG-A--AFSAQFMAIPAFFSKTSALYNPVIYVLLN
-KQFRSCMLTTLF-C---GKNP------------------------------LGDEE-SST
-VSTSKT-E--VSS------V----------------------------------------
----------------------------------------------SP-A---
+MN----------------------------------------------------------
+GTE--GNNFYVP-------------------LSNRTGLVRSPFEYPQ---------Y-YL
+AEPWQ---------FKLLAVYMFFLICLGLPINGLTLICTAQHKKLRQPLNFILVNLAVA
+GAIMVCFG-FTVTFYTA-IN-GYFA--LGPTGCAVEGFMATLGGEVALWSLVVLAIERYI
+VVCKPMGS-FKFSSTHASAGIAFTWVMAMAC-AAPPLVG-W-----SRYIPEGIQCSCGP
+DYYTLNPEYNNESYVLYMFICHFILPVTIIFFTYGRLVCTV----KA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-AAAQQQ---------------------------------DSASTQKAEREVTKMVILMV
+LGFLVAWTPYATVAAWIFF-NKGA--AFSAQFMAIPAFFSKTSALYNPVIYVLLNKQFRS
+CMLTTLF----C---GKNPLGD-EE-SSTVSTSKTEV-----------------------
+----------------------------------------------------------SS
+------------VSP-A----------
> 6== L11864 1 Carassius auratus blue cone opsin <retina>[Biochemistry32,208-
-M-----------------------------------------------------------
-------------------------------KQVPEFHEDFYI----PIPL----------
-------------------------DINNLS--AYSPFLVPQD-HLG-----NQGI-----
-----FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIANLFVAIFG-S
-PLSFYSF-FN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWLVICKPLGN-F
-TFKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGPDWYTTNNKYN
-NESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KLAAKAQAD----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------SASTQKAEREVTKMV
-VVMVLGFLVCWAPYASFSLWIVS-HRG-E--EFDLRMATIPSCLSKASTVYNPVIYVLMN
-KQFRSCMM-KMV-C---GKN-------------------------------IEEDE--AS
-TSSQVT-Q--VSS------V----------------------------------------
----------------------------------------------APEK---
+MK----------------------------------------------------------
+QVPEFHEDFYIPI---------------PLDINNLS--AYSPFLVPQ---------D-HL
+GNQGI---------FMAMSVFMFFIFIGGASINILTILCTIQFKKLRSHLNYILVNLSIA
+NLFVAIFG-SPLSFYSF-FN-RYFI--FGATACKIEGFLATLGGMVGLWSLAVVAFERWL
+VICKPLGN-FTFKTPHAIAGCILPWISALAA-SLPPLFG-W-----SRYIPEGLQCSCGP
+DWYTTNNKYNNESYVMFLFCFCFAVPFGTIVFCYGQLLITL----KL-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-AAKAQA---------------------------------DSASTQKAEREVTKMVVVMV
+LGFLVCWAPYASFSLWIVS-HRGE--EFDLRMATIPSCLSKASTVYNPVIYVLMNKQFRS
+CMM-KMV----C---GKN-IEE-DE--ASTSSQVTQV-----------------------
+----------------------------------------------------------SS
+------------VAPEK----------
> 7== M13299 1 human BCP <>[Science232(4747),193-202'86]
-M-----------------------------------------------------------
-------------------------------RKMS--EEEFYL-------F----------
---------------------------KNISSV--GPWDGPQY-HIA-----PVWA-----
-----FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFGGFLLCIFS-V
-FPVFVAS-CN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYIVICKPFGN-F
-RFSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGPDWYTVGTKYR
-SESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KAVAAQQQE----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------SATTQKAEREVSRMV
-VVMVGSFCVCYVPYAAFAMYMVN-NRN-H--GLDLRLVTIPSFFSKSACIYNPIIYCFMN
-KQFQACIM-KMV-C---GKA-------------------------------MTDES--DT
-CSSQKT-E--VSTVS-STQV----------------------------------------
----------------------------------------------GP-N---
+MR----------------------------------------------------------
+KMS--EEEFYL--------------------FKNISSV--GPWDGPQ---------Y-HI
+APVWA---------FYLQAAFMGTVFLIGFPLNAMVLVATLRYKKLRQPLNYILVNVSFG
+GFLLCIFS-VFPVFVAS-CN-GYFV--FGRHVCALEGFLGTVAGLVTGWSLAFLAFERYI
+VICKPFGN-FRFSSKHALTVVLATWTIGIGV-SIPPFFG-W-----SRFIPEGLQCSCGP
+DWYTVGTKYRSESYTWFLFIFCFIVPLSLICFSYTQLLRAL----KA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-VAAQQQ---------------------------------ESATTQKAEREVSRMVVVMV
+GSFCVCYVPYAAFAMYMVN-NRNH--GLDLRLVTIPSFFSKSACIYNPIIYCFMNKQFQA
+CIM-KMV----C---GKA-MTD-ES--DTCSSQKTEV-----------------------
+----------------------------------------------------------ST
+VS-------STQVGP-N----------
> 8=opsin, greensensitive human (fragment) S07060
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
---------------------------------------------------DLAETVIA-S
-TISIVNQ-VS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-V
-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPG
-VQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RAVAKQQKE----------------
+DLAETVIA-STISIVNQ-VS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWL
+VVCKPFGN-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGP
+DVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA-------------
------------------------------------------------------------
------------------------------------------------------------
----------------------------------------------SESTQKAEKEVTRMV
-VVMVLAFC----------------------------------------------------
+-VAKQQK---------------------------------ESESTQKAEKEVTRMVVVMV
+LAFC--------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
-----------------------------------------------------
+---------------------------
> 9== K03494 1 human GCP <>[Science232(4747),193-202'86]
-M-------------AQQWSLQRLA--GRHPQDSYE-------------------------
-------------------------------DSTQ--SSIFTY----TN------------
---------------------------SNST---RGPFEGPNY-HIA-----PRWV-----
-----YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-S
-TISVVNQ-VY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWMVVCKPFGN-V
-RFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPG
-VQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RAVAKQQKE----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------SESTQKAEKEVTRMV
-VVMVLAFCFCWGPYAFFACFAAA-NPG-Y--PFHPLMAALPAFFAKSATIYNPVIYVFMN
-RQFRNCIL-QLF-----GKK-------------------------------VDDGS--EL
-SSASKT-E--VSSV---SSV----------------------------------------
----------------------------------------------SP-A---
+MAQQWSLQRLA---------------------------------------GRHPQDSYED
+STQ--SSIFTYT-------------------NSNST---RGPFEGPN---------Y-HI
+APRWV---------YHLTSVWMIFVVIASVFTNGLVLAATMKFKKLRHPLNWILVNLAVA
+DLAETVIA-STISVVNQ-VY-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWM
+VVCKPFGN-VRFDAKLAIVGIAFSWIWAAVW-TAPPIFG-W-----SRYWPHGLKTSCGP
+DVFSGSSYPGVQSYMIVLMVTCCITPLSIIVLCYLQVWLAI----RA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-VAKQQK---------------------------------ESESTQKAEKEVTRMVVVMV
+LAFCFCWGPYAFFACFAAA-NPGY--PFHPLMAALPAFFAKSATIYNPVIYVFMNRQFRN
+CILQLF---------GKK-VDD-GS--ELSSASKTEV-----------------------
+----------------------------------------------------------SS
+V---------SSVSP-A----------
> 10== Z68193 1 human Red Opsin <>[]
-M-------------AQQWSLQRLA--GRHPQDSYE-------------------------
-------------------------------DSTQ--SSIFTY----TN------------
---------------------------SNST---RGPFEGPNY-HIA-----PRWV-----
-----YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVADLAETVIA-S
-TISIVNQ-VS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWLVVCKPFGN-V
-RFDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSYPG
-VQSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RAVAKQQKE----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------SESTQKAEKEVTRMV
-VVMIFAYCVCWGPYTFFACFAAA-NPG-Y--AFHPLMAALPAYFAKSATIYNPVIYVFMN
-RQFRNCIL-QLF-----GKK-------------------------------VDDGS--EL
-SSASKT-E--VSSV---SSV----------------------------------------
----------------------------------------------SP-A---
+MAQQWSLQRLA---------------------------------------GRHPQDSYED
+STQ--SSIFTYT-------------------NSNST---RGPFEGPN---------Y-HI
+APRWV---------YHLTSVWMIFVVTASVFTNGLVLAATMKFKKLRHPLNWILVNLAVA
+DLAETVIA-STISIVNQ-VS-GYFV--LGHPMCVLEGYTVSLCGITGLWSLAIISWERWL
+VVCKPFGN-VRFDAKLAIVGIAFSWIWSAVW-TAPPIFG-W-----SRYWPHGLKTSCGP
+DVFSGSSYPGVQSYMIVLMVTCCIIPLAIIMLCYLQVWLAI----RA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-VAKQQK---------------------------------ESESTQKAEKEVTRMVVVMI
+FAYCVCWGPYTFFACFAAA-NPGY--AFHPLMAALPAYFAKSATIYNPVIYVFMNRQFRN
+CILQLF---------GKK-VDD-GS--ELSSASKTEV-----------------------
+----------------------------------------------------------SS
+V---------SSVSP-A----------
> 11== M92036 1 Gecko gecko P521 <retina>[PNAS89,6841-6845'92]
-M-------------TEAWNVAVFA--ARRSRDD-D-------------------------
-------------------------------DTTR--GSVFTY----TN------------
---------------------------TNNT---RGPFEGPNY-HIA-----PRWV-----
-----YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFVDLVETLVA-S
-TISVFNQ-IF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWFVVCKPFGN-I
-KFDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSVELG
-CQSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RAVAAQQKE----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------SESTQKAEREVSRMV
-VVMIVAFCICWGPYASFVSFAAA-NPG-Y--AFHPLAAALPAYFAKSATIYNPVIYVFMN
-RQFRNCIM-QLF-----GKK-------------------------------VDDGS--EA
-STTSRT-E--VSSVS-NSSV----------------------------------------
----------------------------------------------AP-A---
+MTEAWNVAVFA---------------------------------------ARRSRDD-DD
+TTR--GSVFTYT-------------------NTNNT---RGPFEGPN---------Y-HI
+APRWV---------YNLVSFFMIIVVIASCFTNGLVLVATAKFKKLRHPLNWILVNLAFV
+DLVETLVA-STISVFNQ-IF-GYFI--LGHPLCVIEGYVVSSCGITGLWSLAIISWERWF
+VVCKPFGN-IKFDSKLAIIGIVFSWVWAWGW-SAPPIFG-W-----SRYWPHGLKTSCGP
+DVFSGSVELGCQSFMLTLMITCCFLPLFIIIVCYLQVWMAI----RA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-VAAQQK---------------------------------ESESTQKAEREVSRMVVVMI
+VAFCICWGPYASFVSFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPVIYVFMNRQFRN
+CIMQLF---------GKK-VDD-GS--EASTTSRTEV-----------------------
+----------------------------------------------------------SS
+VS-------NSSVAP-A----------
> 12== M62903 1 chicken visual pigment <>[BBRC173,1212-1217'90]
-M-------------AA-WEAAFAA--RRRHEE--E-------------------------
-------------------------------DTTR--DSVFTY----TN------------
---------------------------SNNT---RGPFEGPNY-HIA-----PRWV-----
-----YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVADLGETVIA-S
-TISVINQ-IS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWFVVCKPFGN-I
-KFDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGPDVFSGSSDPG
-VQSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RAVAAQQKE----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------SESTQKAEKEVSRMV
-VVMIVAYCFCWGPYTFFACFAAA-NPG-Y--AFHPLAAALPAYFAKSATIYNPIIYVFMN
-RQFRNCIL-QLF-----GKK-------------------------------VDDGS--EV
-ST-SRT-E--VSSVS-NSSV----------------------------------------
----------------------------------------------SP-A---
+MA-AWEAAFAA---------------------------------------RRRHEE--ED
+TTR--DSVFTYT-------------------NSNNT---RGPFEGPN---------Y-HI
+APRWV---------YNLTSVWMIFVVAASVFTNGLVLVATWKFKKLRHPLNWILVNLAVA
+DLGETVIA-STISVINQ-IS-GYFI--LGHPMCVVEGYTVSACGITALWSLAIISWERWF
+VVCKPFGN-IKFDGKLAVAGILFSWLWSCAW-TAPPIFG-W-----SRYWPHGLKTSCGP
+DVFSGSSDPGVQSYMVVLMVTCCFFPLAIIILCYLQVWLAI----RA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-VAAQQK---------------------------------ESESTQKAEKEVSRMVVVMI
+VAYCFCWGPYTFFACFAAA-NPGY--AFHPLAAALPAYFAKSATIYNPIIYVFMNRQFRN
+CILQLF---------GKK-VDD-GS--EVST-SRTEV-----------------------
+----------------------------------------------------------SS
+VS-------NSSVSP-A----------
> 13== S75720 1 chicken P-opsin <>[Science267(5203),1502-1506'95]
-M-------------S-------------------S-------------------------
-------------------------------NSSQ--AP----------------------
---------------------------PNGT---PGPFDGPQWPYQA-----PQST-----
-----YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVADLLVTLCG-S
-SVSLSNN-IN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYVVVCKPLGD-F
-QFQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGPNWYTGGSNN-
--NSYILSLFVTCFVLPLSLILFSYTNLLLTL----RAAAAQQKE----------------
-------------------------------------------------------------
-------------------------------------------------------------
----------------------------------------------ADTTQRAEREVTRMV
-IVMVMAFLLCWLPYSTFALVVAT-HKG-I--IIQPVLASLPSYFSKTATVYNPIIYVFMN
-KQFQSCLL-EMLCCGYQPQR-------------------------------TGKAS--PG
-TPGPHA-D--VTAAGLRNKV----------------------------------------
----------------------------------------------MP-AHPV
+MS--------------------------------------------------------SN
+SSQ--AP-------------------------PNGT---PGPFDGPQ---------WPYQ
+APQST---------YVGVAVLMGTVVACASVVNGLVIVVSICYKKLRSPLNYILVNLAVA
+DLLVTLCG-SSVSLSNN-IN-GFFV--FGRRMCELEGFMVSLTGIVGLWSLAILALERYV
+VVCKPLGD-FQFQRRHAVSGCAFTWGWALLW-SAPPLLG-W-----SSYVPEGLRTSCGP
+NWYTGGSNN--NSYILSLFVTCFVLPLSLILFSYTNLLLTL----RA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-AAAQQK---------------------------------EADTTQRAEREVTRMVIVMV
+MAFLLCWLPYSTFALVVAT-HKGI--IIQPVLASLPSYFSKTATVYNPIIYVFMNKQFQS
+CLLEMLC----CGYQPQR-TGK-AS--PGTPGPHADV-----------------------
+----------------------------------------------------------TA
+AG------LRNKVMP-AH-------PV
> 14== M17718 1 D.melanogaster Rh3 <>[J.Neurosci.7,1550-1557'87]
-M---------------------------------ES-G----------------------
-------------------------------NVSS---------------SLFGNVSTALR
-PEARLSA---ETRLLGWNVPPEELR------------------HIP-----EHWLTYPEP
-PESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFCDFMMMVK--T
-PIFIYNS-FH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFNVITRPMEG--
-KMTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTFDYLT--DNFD
-TRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVESL---------------------RS-NVDKNKETAEIRIAKAA
-ITICFLFFCSWTPYGVMSLIGAF-GDK-T--LLTPGATMIPACACKMVACIDPFVYAISH
-PRYRMELQ---------KRCP------------------WLALNEKAPE--SSAVA--ST
-STTQEP-Q-------------------------QT-------------------------
----------------------------------------------TA-A---
+MESGNVS-----------------------------------------------SSLFGN
+VSTALRPEARLSA---ETRLL----GW------------NVPPEELR-----------HI
+PEHWLTYPEPPESMNYLLGTLYIFFTLMSMLGNGLVIWVFSAAKSLRTPSNILVINLAFC
+DFMMMVK--TPIFIYNS-FH-QGYA--LGHLGCQIFGIIGSYTGIAAGATNAFIAYDRFN
+VITRPMEG--KMTHGKAIAMIIFIYMYATPW-VVACYTETW-----GRFVPEGYLTSCTF
+DYLT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-LRDQAKK--------------------MNVESL---RSNVDKNKETAEIRIAKAAITIC
+FLFFCSWTPYGVMSLIGAF-GDKT--LLTPGATMIPACACKMVACIDPFVYAISHPRYRM
+ELQKRCP----WLALNEKAPE--SS-AVASTSTTQEP-----------------------
+------------------------------------------------------------
+----------QQTTA-A----------
> 15== X65879 1 Drosophila pseudoobscura Dpse\Rh3 <>[Genetics132(1),193-204'92
-M---------------------------------EY-H----------------------
-------------------------------NVSS---------------VL-GNVSSVLR
-PDARLSA---ESRLLGWNVPPDELR------------------HIP-----EHWLIYPEP
-PESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFCDFMMMIK--T
-PIFIYNS-FH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYNVITRPMEG--
-KMTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTFDYLT--DNFD
-TRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKALRDQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVDSL---------------------RS-NVDKSKEAAEIRIAKAA
-ITICFLFFASWTPYGVMSLIGAF-GDK-T--LLTPGATMIPACTCKMVACIDPFVYAISH
-PRYRMELQ---------KRCP------------------WLAISEKAPE--SRAAI--ST
-STTQEQ-Q-------------------------QT-------------------------
----------------------------------------------TA-A---
+MEYHNVS-----------------------------------------------SVL-GN
+VSSVLRPDARLSA---ESRLL----GW------------NVPPDELR-----------HI
+PEHWLIYPEPPESMNYLLGTLYIFFTVISMIGNGLVMWVFSAAKSLRTPSNILVINLAFC
+DFMMMIK--TPIFIYNS-FH-QGYA--LGHLGCQIFGVIGSYTGIAAGATNAFIAYDRYN
+VITRPMEG--KMTHGKAIAMIIFIYLYATPW-VVACYTESW-----GRFVPEGYLTSCTF
+DYLT--DNFDTRLFVACIFFFSFVCPTTMITYYYSQIVGHVFSHEKA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-LRDQAKK--------------------MNVDSL---RSNVDKSKEAAEIRIAKAAITIC
+FLFFASWTPYGVMSLIGAF-GDKT--LLTPGATMIPACTCKMVACIDPFVYAISHPRYRM
+ELQKRCP----WLAISEKAPE--SR-AAISTSTTQEQ-----------------------
+------------------------------------------------------------
+----------QQTTA-A----------
> 16== M17730 1 D.melanogaster Rh4 opsin <>[J.Neurosci.7,1558-1566'87]
-M---------------------------------EPLC----------------------
-------------------------------NASE---------------PP-------LR
-PEAR-SSGNGDLQFLGWNVPPDQIQ------------------YIP-----EHWLTQLEP
-PASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--A
-PIF--NS-FH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--
-NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFD
-TRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKALREQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVESL---------------------RS-NVDKSKETAEIRIAKAA
-ITICFLFFVSWTPYGVMSLIGAF-GDK-S--LLTQGATMIPACTCKLVACIDPFVYAISH
-PRYRLELQ---------KRCP------------------WLGVNEKSGE--ISSAQ--ST
--TTQEQ-Q-------------------------QT-------------------------
----------------------------------------------TA-A---
+ME-----------------------------------------------------PLCNA
+SEPPLRPEAR-SSGNGDLQFL----GW------------NVPPDQIQ-----------YI
+PEHWLTQLEPPASMHYMLGVFYIFLFCASTVGNGMVIWIFSTSKSLRTPSNMFVLNLAVF
+DLIMCLK--APIF--NS-FH-RGFAIYLGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYN
+VITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSF
+DYLS--DNFDTRLFVGTIFFFSFVCPTLMILYYYSQIVGHVFSHEKA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-LREQAKK--------------------MNVESL---RSNVDKSKETAEIRIAKAAITIC
+FLFFVSWTPYGVMSLIGAF-GDKS--LLTQGATMIPACTCKLVACIDPFVYAISHPRYRL
+ELQKRCP----WLGVNEKSGE--IS-SAQST-TTQEQ-----------------------
+------------------------------------------------------------
+----------QQTTA-A----------
> 17== X65880 1 Drosophila pseudoobscura Dpse\Rh4 <>[Genetics132(1),193-204'92
-M---------------------------------DALC----------------------
-------------------------------NASE---------------PP-------LR
-PEARMSSGSDELQFLGWNVPPDQIQ------------------YIP-----EHWLTQLEP
-PASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVFDLIMCLK--A
-PIFIYNS-FH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYNVITKPMNR--
-NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSFDYLS--DNFD
-TRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKALREQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVESL---------------------RS-NVDKSKETAEIRIAKAA
-ITICFLFFVSWTPYGVMSLIGAF-GDK-S--LLTPGATMIPACTCKLVACIEPFVYAISH
-PRYRMELQ---------KRCP------------------WLGVNEKSGE--ASSAQ--ST
--TTQEQTQ-------------------------QT-------------------------
----------------------------------------------SA-A---
+MD-----------------------------------------------------ALCNA
+SEPPLRPEARMSSGSDELQFL----GW------------NVPPDQIQ-----------YI
+PEHWLTQLEPPASMHYMLGVFYIFLFFASTLGNGMVIWIFSTSKSLRTPSNMFVLNLAVF
+DLIMCLK--APIFIYNS-FH-RGFA--LGNTWCQIFASIGSYSGIGAGMTNAAIGYDRYN
+VITKPMNR--NMTFTKAVIMNIIIWLYCTPW-VVLPLTQFW-----DRFVPEGYLTSCSF
+DYLS--DNFDTRLFVGTIFLFSFVVPTLMILYYYSQIVGHVFNHEKA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-LREQAKK--------------------MNVESL---RSNVDKSKETAEIRIAKAAITIC
+FLFFVSWTPYGVMSLIGAF-GDKS--LLTPGATMIPACTCKLVACIEPFVYAISHPRYRM
+ELQKRCP----WLGVNEKSGE--AS-SAQST-TTQEQ-----------------------
+------------------------------------------------------------
+---------TQQTSA-A----------
> 18== D50584 1 Hemigrapsus sanguineus opsin BcRh2 <compound eye>[J.Exp.Biol.1
-M---------------------------------------------TNATGPQMAYYG--
----AASMDFGYPE-----------------GVSI---------------VD---------
------------------FVRPEIKP------------------YVH-----QHWYNYPPV
-NPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALSDLIMLTTN-V
-PFFTYNC-FSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYNIICNGFNG-P
-KLTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSYDYLT--QDFN
-TFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAAMRAQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVSTL---------------------RS-N-EADAQRAEIRIAKTA
-LVNVSLWFICWTPYALISLKGVM-GDT-S--GITPLVSTLPALLAKSCSCYNPFVYAISH
-PKYRLAIT---------QHLP------------------WFCVHETETKS-NDDSQ--SN
-STVAQ-------------------------------------------------------
----------------------------------------------DK-A---
+MT----------------------------------------------------------
+--NATGPQMAYYGAASMD-FG-------Y--PEGVSIVDFVRPEIKP-----------YV
+HQHWYNYPPVNPMWHYLLGVIYLFLGTVSIFGNGLVIYLFNKSAALRTPANILVVNLALS
+DLIMLTTN-VPFFTYNC-FSGGVWM--FSPQYCEIYACLGAITGVCSIWLLCMISFDRYN
+IICNGFNG-PKLTTGKAVVFALISWVIAIGC-ALPPFFG-W-----GNYILEGILDSCSY
+DYLT--QDFNTFSYNIFIFVFDYFLPAAIIVFSYVFIVKAIFAHEAA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-MRAQAKK--------------------MNVSTL---RS-NEADAQRAEIRIAKTALVNV
+SLWFICWTPYALISLKGVM-GDTS--GITPLVSTLPALLAKSCSCYNPFVYAISHPKYRL
+AITQHLP----WFCVHETETKS-ND-DSQSNSTVAQ------------------------
+------------------------------------------------------------
+-------------DK-A----------
> 19== D50583 1 Hemigrapsus sanguineus opsin BcRh1 <compound eye>[J.Exp.Biol.1
-M---------------------------------------------ANVTGPQMAFYG--
----SGAATFGYPE-----------------GMTV---------------AD---------
------------------FVPDRVKH------------------MVL-----DHWYNYPPV
-NPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALSDLIMLTTN-F
-PPFCYNC-FSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYNIICNGFNG-P
-KLTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSYDYFT--RDMN
-TITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAAMRAQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVTNL---------------------RS-N-EAETQRAEIRIAKTA
-LVNVSLWFICWTPYAAITIQGLL-GNA-E--GITPLLTTLPALLAKSCSCYNPFVYAISH
-PKFRLAIT---------QHLP------------------WFCVHEKDPND-VEENQ--SS
-NTQTQ-------------------------------------------------------
----------------------------------------------EK-S---
+MA----------------------------------------------------------
+--NVTGPQMAFYGSGAAT-FG-------Y--PEGMTVADFVPDRVKH-----------MV
+LDHWYNYPPVNPMWHYLLGVVYLFLGVISIAGNGLVIYLYMKSQALKTPANMLIVNLALS
+DLIMLTTN-FPPFCYNC-FSGGRWM--FSGTYCEIYAALGAITGVCSIWTLCMISFDRYN
+IICNGFNG-PKLTQGKATFMCGLAWVISVGW-SLPPFFG-W-----GSYTLEGILDSCSY
+DYFT--RDMNTITYNICIFIFDFFLPASVIVFSYVFIVKAIFAHEAA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-MRAQAKK--------------------MNVTNL---RS-NEAETQRAEIRIAKTALVNV
+SLWFICWTPYAAITIQGLL-GNAE--GITPLLTTLPALLAKSCSCYNPFVYAISHPKFRL
+AITQHLP----WFCVHEKDPND-VE-ENQSSNTQTQ------------------------
+------------------------------------------------------------
+-------------EK-S----------
> 20== K02320 1 D.melanogaster opsin <>[Cell40,851-858'85]
-M---------------------------------E------SFAVAAAQLGPHFAPLS--
-------------------------------NGSV---------------VD---------
------------------KVTPDMAH------------------LIS-----PYWNQFPAM
-DPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T
-PMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-R
-PMTIPLALGKM---------------------------YVPEGNLTSCGIDYLE--RDWN
-PRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVKSL---------------------RS-S-EDAEKSAEGKLAKVA
-LVTITLWFMAWTPYLVINCMGLF--KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH
-PKYRLALK---------EKCP------------------CCVFGKVDDGK-SSDAQ--SQ
-ATASEA-E----------------------------------------------------
----------------------------------------------SK-A---
+ME------------------------------------------------------SFAV
+AAAQLGPHFAPLS--------------------NGSVVDKVTPDMAH-----------LI
+SPYWNQFPAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAIS
+DFGIMITN-TPMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQ
+VIVKGMAG-RPMTIPLALGKM---------------------------YVPEGNLTSCGI
+DYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-MREQAKK--------------------MNVKSL---RS-SEDAEKSAEGKLAKVALVTI
+TLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRL
+ALKEKCP----CCVFGKVDDGK-SS-DAQSQATASEA-----------------------
+----------------------------------------------------------E-
+-------------SK-A----------
> 21== K02315 1 D.melanogaster ninaE <>[Cell40,839-850'85]
-M---------------------------------E------SFAVAAAQLGPHFAPLS--
-------------------------------NGSV---------------VD---------
------------------KVTPDMAH------------------LIS-----PYWNQFPAM
-DPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T
-PMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-R
-PMTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGIDYLE--RDWN
-PRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVKSL---------------------RS-S-EDAEKSAEGKLAKVA
-LVTITLWFMAWTPYLVINCMGLF--KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH
-PKYRLALK---------EKCP------------------CCVFGKVDDGK-SSDAQ--SQ
-ATASEA-E----------------------------------------------------
----------------------------------------------SK-A---
+ME------------------------------------------------------SFAV
+AAAQLGPHFAPLS--------------------NGSVVDKVTPDMAH-----------LI
+SPYWNQFPAMDPIWAKILTAYMIMIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAIS
+DFGIMITN-TPMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQ
+VIVKGMAG-RPMTIPLALGKIAYIWFMSSIW-CLAPAFG-W-----SRYVPEGNLTSCGI
+DYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-MREQAKK--------------------MNVKSL---RS-SEDAEKSAEGKLAKVALVTI
+TLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRL
+ALKEKCP----CCVFGKVDDGK-SS-DAQSQATASEA-----------------------
+----------------------------------------------------------E-
+-------------SK-A----------
> 22== X65877 1 Drosophila pseudoobscura Dpse\ninaE <>[Genetics132(1),193-204'
-M---------------------------------D------SFAAVATQLGPQFAAPS--
-------------------------------NGSV---------------VD---------
------------------KVTPDMAH------------------LIS-----PYWDQFPAM
-DPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAISDFGIMITN-T
-PMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQVIVKGMAG-R
-PMTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGIDYLE--RDWN
-PRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKAMREQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVKSL---------------------RS-S-EDADKSAEGKLAKVA
-LVTISLWFMAWTPYLVINCMGLF--KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISH
-PKYRLALK---------EKCP------------------CCVFGKVDDGK-SSEAQ--SQ
-ATTSEA-E----------------------------------------------------
----------------------------------------------SK-A---
+MD------------------------------------------------------SFAA
+VATQLGPQFAAPS--------------------NGSVVDKVTPDMAH-----------LI
+SPYWDQFPAMDPIWAKILTAYMIIIGMISWCGNGVVIYIFATTKSLRTPANLLVINLAIS
+DFGIMITN-TPMMGINL-YF-ETWV--LGPMMCDIYAGLGSAFGCSSIWSMCMISLDRYQ
+VIVKGMAG-RPMTIPLALGKIAYIWFMSTIWCCLAPVFG-W-----SRYVPEGNLTSCGI
+DYLE--RDWNPRSYLIFYSIFVYYIPLFLICYSYWFIIAAVSAHEKA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-MREQAKK--------------------MNVKSL---RS-SEDADKSAEGKLAKVALVTI
+SLWFMAWTPYLVINCMGLF-KF-E--GLTPLNTIWGACFAKSAACYNPIVYGISHPKYRL
+ALKEKCP----CCVFGKVDDGK-SS-EAQSQATTSEA-----------------------
+----------------------------------------------------------E-
+-------------SK-A----------
> 23== M12896 1 D.melanogaster Rh2 <>[Cell44,705-710'86]
-M---------------------------------ER-SHLPETPFDLAHSGPRFQAQS--
----SG-------------------------NGSV---------------LD---------
------------------NVLPDMAH------------------LVN-----PYWSRFAPM
-DPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-S
-PVMIINF-YY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYNVIVKGING-T
-PMTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSIDYMT--RMWN
-PRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKAMREQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVKSL---------------------RS-S-EDCDKSAEGKLAKVA
-LTTISLWFMAWTPYLVICYFGLF--KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISH
-PKYRIVLK---------EKCP------------------MCVFGNTDEPKPDAPAS--DT
-ETTSEA-D----------------------------------------------------
----------------------------------------------SK-A---
+MERSHL-------------------------------------------------PETPF
+DLAHSGPRFQAQSSG------------------NGSVLDNVLPDMAH-----------LV
+NPYWSRFAPMDPMMSKILGLFTLAIMIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFS
+DFCMMASQ-SPVMIINF-YY-ETWV--LGPLWCDIYAGCGSLFGCVSIWSMCMIAFDRYN
+VIVKGING-TPMTIKTSIMKILFIWMMAVFW-TVMPLIG-W-----SAYVPEGNLTACSI
+DYMT--RMWNPRSYLITYSLFVYYTPLFLICYSYWFIIAAVAAHEKA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-MREQAKK--------------------MNVKSL---RS-SEDCDKSAEGKLAKVALTTI
+SLWFMAWTPYLVICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPKYRI
+VLKEKCP----MCVFGNTDEPKPDA-PASDTETTSEA-----------------------
+----------------------------------------------------------D-
+-------------SK-A----------
> 24== X65878 1 Drosophila pseudoobscura Dpse\Rh2 <>[Genetics132(1),193-204'92
-M---------------------------------ER-SLLPEPPLAMALLGPRFEAQT--
----GG-------------------------NRSV---------------LD---------
------------------NVLPDMAP------------------LVN-----PHWSRFAPM
-DPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFSDFCMMASQ-S
-PVMIINF-YY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYNVIVKGING-T
-PMTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSIDYMT--RQWN
-PRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKAMRDQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVKSL---------------------RS-S-EDCDKSAENKLAKVA
-LTTISLWFMAWTPYLIICYFGLF--KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISH
-PNDRLVLK---------EKCP------------------MCVCGTTDEPKPDAPPS--DT
-ETTSEA-E----------------------------------------------------
----------------------------------------------SK-D---
+MERSLL-------------------------------------------------PEPPL
+AMALLGPRFEAQTGG------------------NRSVLDNVLPDMAP-----------LV
+NPHWSRFAPMDPTMSKILGLFTLVILIISCCGNGVVVYIFGGTKSLRTPANLLVLNLAFS
+DFCMMASQ-SPVMIINF-YY-ETWV--LGPLWCDIYAACGSLFGCVSIWSMCMIAFDRYN
+VIVKGING-TPMTIKTSIMKIAFIWMMAVFW-TIMPLIG-W-----SSYVPEGNLTACSI
+DYMT--RQWNPRSYLITYSLFVYYTPLFMICYSYWFIIATVAAHEKA-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-MRDQAKK--------------------MNVKSL---RS-SEDCDKSAENKLAKVALTTI
+SLWFMAWTPYLIICYFGLF-KI-D--GLTPLTTIWGATFAKTSAVYNPIVYGISHPNDRL
+VLKEKCP----MCVCGTTDEPKPDA-PPSDTETTSEA-----------------------
+----------------------------------------------------------E-
+-------------SK-D----------
> 25== U26026 1 Apis mellifera long-wavelength rhodopsin <>[]
-M---------------------------------------------IAVSGPSYEAFS--
----YG----GQAR---------------FNNQTV---------------VD---------
------------------KVPPDMLH------------------LID-----ANWYQYPPL
-NPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAISNFLMMFCM-S
-PPMVINC-YY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYNVIVKGLSG-K
-PLSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGTDYFN--RGLL
-SASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKNMREQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVASL---------------------RS-S-ENQNTSAECKLAKVA
-LMTISLWFMAWTPYLVINFSGIF--NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISH
-PKYRAALF---------AKFP------------------SLAC-AAEPSS-DAVST--TS
-GTTTVT-D----------------------------------------------------
------------------------------------------NEK-SN-A---
+MI----------------------------------------------------------
+--AVSGPSYEAFSYGGQARF------------NNQTVVDKVPPDMLH-----------LI
+DANWYQYPPLNPMWHGILGFVIGMLGFVSAMGNGMVVYIFLSTKSLRTPSNLFVINLAIS
+NFLMMFCM-SPPMVINC-YY-ETWV--LGPLFCQIYAMLGSLFGCGSIWTMTMIAFDRYN
+VIVKGLSG-KPLSINGALIRIIAIWLFSLGW-TIAPMFG-W-----NRYVPEGNMTACGT
+DYFN--RGLLSASYLVCYGIWVYFVPLFLIIYSYWFIIQAVAAHEKN-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-MREQAKK--------------------MNVASL---RS-SENQNTSAECKLAKVALMTI
+SLWFMAWTPYLVINFSGIF-NL-V--KISPLFTIWGSLFAKANAVYNPIVYGISHPKYRA
+ALFAKFP----SLAC-AAEPSS-DA-VSTTSGTTTVT-----------------------
+----------------------------------------------------------DN
+EK-----------SN-A----------
> 26== L03781 1 Limulus polyphemus opsin <>[PNAS90,6150-6154'93]
-M-----------------------------------------------ANQLSYSSLG--
----WP----YQP------------------NASV---------------VD---------
------------------TMPKEMLY------------------MIH-----EHWYAFPPM
-NPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFSDFCMMAFM-M
-PTMTSNC-FA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYNVIVRGMAA-A
-PLTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTVDYLT--KDWS
-SASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQLREQAKK----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------MNVASL---------------------RANA-DQQKQSAECRLAKVA
-MMTVGLWFMAWTPYLIISWAGVF--SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISH
-PRYKAALY---------QRFP------------------SLACGSGESGS-DVKSE--AS
-ATTTME-E----------------------------------------------------
------------------------------------------KPKIPE-A---
-> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
M-----------------------------------------------------------
----------VESTTLVNQTW-------WY-NPTV---------------D----------
---------------------------------------------IH-----PHWAKFDPI
-PDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMSDLSFSAINGF
-PLKTISA-FM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYNVIGRPMAASK
-KMSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSFDYLS--TDPS
-TRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKEMAAMAKR----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------LNAKEL---------------------RK---AQAGASAEMKLAKIS
-MVIITQFMLSWSPYAIIALLAQF-GPA-E--WVTPYAAELPVLFAKASAIHNPIVYSVSH
-PKFREAIQ---------TTFPWL--------------LTCCQFDEKECED-ANDAE--EE
-VVASER---GGESRD-AAQMKEMMAMMQKMQAQQAAYQPPPPPQGY--PPQGYPPQGAYP
-PPQGYPPQGYPPQGYPPQGYPPQGAPPQVEAPQGAPPQGVDNQA-YQ-A---
+---ANQLSYSSLGWPYQP---------------NASVVDTMPKEMLY-----------MI
+HEHWYAFPPMNPLWYSILGVAMIILGIICVLGNGMVIYLMMTTKSLRTPTNLLVVNLAFS
+DFCMMAFM-MPTMTSNC-FA-ETWI--LGPFMCEVYGMAGSLFGCASIWSMVMITLDRYN
+VIVRGMAA-APLTHKKATLLLLFVWIWSGGW-TILPFFG-W-----SRYVPEGNLTSCTV
+DYLT--KDWSSASYVVIYGLAVYFLPLITMIYCYFFIVHAVAEHEKQ-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-LREQAKK--------------------MNVASL---RANADQQKQSAECRLAKVAMMTV
+GLWFMAWTPYLIISWAGVF-SSGT--RLTPLATIWGSVFAKANSCYNPIVYGISHPRYKA
+ALYQRFP----SLACGSGESGS-DV-KSEASATTTME-----------------------
+----------------------------------------------------------EK
+PK----------IPE-A----------
+> 27== X07797 1 Octopus dofleini rhodopsin <>[FEBS232(1),69-72'88]
+MVES--------------------------------------------------------
+-------------------TTLVNQTWWY----NPTVD---------------------I
+HPHWAKFDPIPDAVYYSVGIFIGVVGIIGILGNGVVIYLFSKTKSLQTPANMFIINLAMS
+DLSFSAINGFPLKTISA-FM-KKWI--FGKVACQLYGLLGGIFGFMSINTMAMISIDRYN
+VIGRPMAASKKMSHRRAFLMIIFVWMWSIVW-SVGPVFN-W-----GAYVPEGILTSCSF
+DYLS--TDPSTRSFILCMYFCGFMLPIIIIAFCYFNIVMSVSNHEKE-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-MAAMAKR--------------------LNAKEL---R--KAQAGASAEMKLAKISMVII
+TQFMLSWSPYAIIALLAQF-GPAE--WVTPYAAELPVLFAKASAIHNPIVYSVSHPKFRE
+AIQTTFPWLLTCCQFDEKECED-AN-DAEEEVVASER--GGESRDAAQMKEMMAMMQKMQ
+AQQAAYQPPPPPQGY--PPQGYPPQGAYPPPQGYPPQGYPPQGYPPQGYPPQGAPPQVEA
+PQGAPPQGVDNQAYQ-A----------
> 28== X70498 1 Todarodes pacificus rhodopsin <retina>[FEBS317(1-2),5-11'93]
-M-----------------------------------------------------------
----------GRDLR-DNETW-------WY-NPSI---------------V----------
---------------------------------------------VH-----PHWREFDQV
-PDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFSDFTFSLVNGF
-PLMTISC-FL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYNVIGRPMAASK
-KMSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSFDYIS--RDST
-TRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKEMAAMAKR----------------
-------------------------------------------------------------
-------------------------------------------------------------
--------------LNAKEL---------------------RK---AQAGANAEMRLAKIS
-IVIVSQFLLSWSPYAVVALLAQF-GPL-E--WVTPYAAQLPVMFAKASAIHNPMIYSVSH
-PKFREAIS---------QTFPWV--------------LTCCQFDDKETED-DKDAE--TE
-IPAGES-SDAAPSAD-AAQMKEMMAMMQKMQQQQAAY----PPQGYAPPPQGYPPQGY--
-PPQGYPPQGYPPQGYPP---PPQGAPPQ-GAPPAAPPQGVDNQA-YQ-A---
+MGRD--------------------------------------------------------
+-------------------LR-DNETWWY----NPSIV---------------------V
+HPHWREFDQVPDAVYYSLGIFIGICGIIGCGGNGIVIYLFTKTKSLQTPANMFIINLAFS
+DFTFSLVNGFPLMTISC-FL-KKWI--FGFAACKVYGFIGGIFGFMSIMTMAMISIDRYN
+VIGRPMAASKKMSHRRAFIMIIFVWLWSVLW-AIGPIFG-W-----GAYTLEGVLCNCSF
+DYIS--RDSTTRSNILCMFILGFFGPILIIFFCYFNIVMSVSNHEKE-------------
+------------------------------------------------------------
+------------------------------------------------------------
+-MAAMAKR--------------------LNAKEL---R--KAQAGANAEMRLAKISIVIV
+SQFLLSWSPYAVVALLAQF-GPLE--WVTPYAAQLPVMFAKASAIHNPMIYSVSHPKFRE
+AISQTFPWVLTCCQFDDKETED-DK-DAETEIPAGESSDAAPSADAAQMKEMMAMMQKMQ
+QQQAAY----PPQGYAPPPQGYPPQGY--PPQGYPPQGYPPQGYPP---PPQGAPPQ-GA
+PPAAPPQGVDNQAYQ-A----------
> 29== L21195 1 human serotonin 5-HT7 receptor protein <placenta and fetal bra
-MMDVNSSGRPDL----YGHLRSFL--LPE-----------------VGRGLPDLSPDGGA
-DPVAG-------------SWAPHLLS----EVTA--SPAPTW------------------
------------------DAPPDNAS-GCGE-------------QIN-------------Y
-GRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALADLSVAVAV-M
-PFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPV
-RQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF--------
---GYTIYSTAVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF-------------
-----PGF---------------------------------PRVEPDS-------------
---------VIALNGIVKLQKEV--------------------------------------
---------EE--CANLSRLLKH------------------ER-K-NISIFKREQKAATTL
-GIIVGAFTVCWLPFFLLSTARPFICGT-SCSCIPLWVERTFLWLGYANSLINPFIYAFFN
-RDLRTTYR-SLL----------QC---QYRNINRKLSA-------------AGMHE--AL
-KLAERP-E--------RPEF-----VL------QN-------------------------
-------ADYCRKKGHD--------------------------------S---
+MMDVNSSGRPDLY----GHLRSFLLP--EVGRGLPDLSPDGGADPVAGSWAPHLLS---E
+VTASPAPTW------------------------------DAPPDNAS--GCGEQINY---
+----------GRVEKVVIGSILTLITLLTIAGNCLVVISVCFVKKLRQPSNYLIVSLALA
+DLSVAVAV-MPFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYL
+GITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VCLI
+S--------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF----------
+-------PGF---------------------------------PRVEPDSVI--------
+--------------ALNGIVK----------------------LQKEVEECAN-------
+-LSRLLKH-----------------------------ER-KNISIFKREQKAATTLGIIV
+GAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTFLWLGYANSLINPFIYAFFNRDLRT
+TYRSLLQ--------CQYRNIN----RKLSAAGMHEA-------------------LKLA
+----------------------------------------------------------ER
+PE------RPEFVLQNADYCRKKGHDS
> 30== L15228 1 rat 5HT-7 serotonin receptor <>[JBC268,18200-18204'93]
-M-----------------------------------------------------------
----------------------PHLLSGFL-EVTA--SPAPTW------------------
------------------DAPPDNVS-GCGE-------------QIN-------------Y
-GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-M
-PFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPV
-RQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF--------
---GYTIYSTAVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF-------------
-----PGF---------------------------------PRVQPES-------------
---------VISLNGVVKLQKEV--------------------------------------
---------EE--CANLSRLLKH------------------ER-K-NISIFKREQKAATTL
-GIIVGAFTVCWLPFFLLSTARPFICGT-SCSCIPLWVERTCLWLGYANSLINPFIYAFFN
-RDLRPTSR-SLL----------QC---QYRNINRKLSA-------------AGMHE--AL
-KLAERP-E--------RSEF-----VL------QN-------------------------
-------SDHCGKKGHD--------------------------------T---
+M--------------------------------------------------PHLLSGFLE
+VTASPAPTW------------------------------DAPPDNVS--GCGEQINY---
+----------GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALA
+DLSVAVAV-MPFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYL
+GITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VCLI
+S--------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF----------
+-------PGF---------------------------------PRVQPESVI--------
+--------------SLNGVVK----------------------LQKEVEECAN-------
+-LSRLLKH-----------------------------ER-KNISIFKREQKAATTLGIIV
+GAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRP
+TSRSLLQ--------CQYRNIN----RKLSAAGMHEA-------------------LKLA
+----------------------------------------------------------ER
+PE------RSEFVLQNSDHCGKKGHDT
> 31=p A47425 serotonin receptor 5HT-7 - rat
-M-----------------------------------------------------------
----------------------PHLLSGFL-EVTA--SPAPTW------------------
------------------DAPPDNVS-GCGE-------------QIN-------------Y
-GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALADLSVAVAV-M
-PFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYLGITRPLTYPV
-RQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDKVCLISQDF--------
---GYTIYSTAVAFYIPMSVMLFMYYQIY-------KAARKSAAKHKF-------------
-----PGF---------------------------------PRVQPES-------------
---------VISLNGVVKLQKEV--------------------------------------
---------EE--CANLSRLLKH------------------ER-K-NISIFKREQKAATTL
-GIIVGAFTVCWLPFFLLSTARPFICGT-SCSCIPLWVERTCLWLGYANSLINPFIYAFFN
-RDLRTTYR-SLL----------QC---QYRNINRKLSA-------------AGMHE--AL
-KLAERP-E--------RSEF-----VL------QN-------------------------
-------SDHCGKKGHD--------------------------------T---
+M--------------------------------------------------PHLLSGFLE
+VTASPAPTW------------------------------DAPPDNVS--GCGEQINY---
+----------GRVEKVVIGSILTLITLLTIAGNCLVVISVSFVKKLRQPSNYLIVSLALA
+DLSVAVAV-MPFVSVTDLIG-GKWI--FGHFFCNVFIAMDVMCCTASIMTLCVISIDRYL
+GITRPLTYPVRQNGKCMAKMILSVWLLSASI-TLPPLFG-W-----AQNVNDDK--VCLI
+S--------QDFGYTIYSTAVAFYIPMSVMLFMYYQIYKAARKSAAKHKF----------
+-------PGF---------------------------------PRVQPESVI--------
+--------------SLNGVVK----------------------LQKEVEECAN-------
+-LSRLLKH-----------------------------ER-KNISIFKREQKAATTLGIIV
+GAFTVCWLPFFLLSTARPFICGTSCSCIPLWVERTCLWLGYANSLINPFIYAFFNRDLRT
+TYRSLLQ--------CQYRNIN----RKLSAAGMHEA-------------------LKLA
+----------------------------------------------------------ER
+PE------RSEFVLQNSDHCGKKGHDT
> 32== M83181 1 human serotonin receptor <>[JBC267(11),7553-7562'92]
-M-DVLSPGQ-----------------GNN-----------------TTSPPAPFETGG--
-------------------------------NTTG--ISDVTV------------------
-------------------------------------------------------------
---SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L
-PMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVN
-KRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACTISKDH--------
---GYTIYSTFGAFYIPLLLMLVLYGRIF-------RAARFRIRK----------------
------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGSRNWRL
-GVESKAGGALCANGAVRQGDDG----AALEVIEVHRVGNSKEHLPLPSEAGPTPCAPASF
-ERKN----ERN--------------------A--------EA-K-RKMALARERKTVKTL
-GIIMGTFILCWLPFFIVALVLPF-CES-SC-HMPTLLGAIINWLGYSNSLLNPVIYAYFN
-KDFQNAFK-KII---------------KCKFCRQ--------------------------
-------------------------------------------------------------
-----------------------------------------------------
+M-DVLSPGQ--------GNNTTSPPAPFETGGNTTGI-----------------------
+--SDVTVSY---------------------------------------------------
+--------------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVT
+DLMVSVLV-LPMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYW
+AITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---ACTI
+S--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK-------------
+--------------------------TVKKVEKTGADTRHGASPAPQPKKSVNGESGSRN
+WRLGVESKAGGAL-CANGAVRQGDDGAALEVIEVHRVGNSKEHLPLPSEAGPTPCAP---
+--ASFERK-----------NERNA-------------EA-KRKMALARERKTVKTLGIIM
+GTFILCWLPFFIVALVLPF-CESSC-HMPTLLGAIINWLGYSNSLLNPVIYAYFNKDFQN
+AFKKIIK--------CKFCR----------------------------------------
+------------------------------------------------------------
+--------------------------Q
> 33=p A35181 serotonin receptor class 1A - rat
-M-DVFSFGQ-----------------GNN-----------------TTASQEPFGTGG--
-------------------------------NVTS--ISDVTF------------------
-------------------------------------------------------------
---SYQVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLV-L
-PMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVN
-KRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP---EDRSDPDACTISKDH--------
---GYTIYSTFGAFYIPLLLMLVLYGRIF-------RAARFRIRK----------------
------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGDWRR
-CAENRAVGTPCTNGAVRQGDDE----ATLEVIEVHRVGNSKEHLPLPSESGSNSYAPACL
-ERKN----ERN--------------------A--------EA-K-RKMALARERKTVKTL
-GIIMGTFILCWLPFFIVALVLPF-CES-SC-HMPALLGAIINWLGYSNSLLNPVIYAYFN
-KDFQNAFK-KII---------------KCKFCRR--------------------------
-------------------------------------------------------------
-----------------------------------------------------
+M-DVFSFGQ--------GNNTTASQEPFGTGGNVTSI-----------------------
+--SDVTFSY---------------------------------------------------
+--------------QVITSLLLGTLIFCAVLGNACVVAAIALERSLQNVANYLIGSLAVT
+DLMVSVLV-LPMAALYQ-VL-NKWT--LGQVTCDLFIALDVLCCTSSILHLCAIALDRYW
+AITDPIDYVNKRTPRRAAALISLTWLIGFLI-SIPPMLG-WRTP--EDRSDPD---ACTI
+S--------KDHGYTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK-------------
+--------------------------TVRKVEKKGAGTSLGTSSAPPPKKSLNGQPGSGD
+WRRCAENRAVGTP-CTNGAVRQGDDEATLEVIEVHRVGNSKEHLPLPSESGSNSYAP---
+--ACLERK-----------NERNA-------------EA-KRKMALARERKTVKTLGIIM
+GTFILCWLPFFIVALVLPF-CESSC-HMPALLGAIINWLGYSNSLLNPVIYAYFNKDFQN
+AFKKIIK--------CKFCR----------------------------------------
+------------------------------------------------------------
+--------------------------R
> 34== L06803 1 Lymnaea stagnalis serotonin receptor <>[PNAS90,11-15'93]
-M-ANFTFGDLALDVARMGGLASTPSGLRS-----------------TGLTTPGLSPTG--
-------------------------------LVTS--DFNDSYGLTGQF-IN-GSHS----
------------------SRSRDNAS-ANDT-------------SATNMTDDRYWS-LTVY
-SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M
-PLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIR
-RRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK--------
---GYTIFSTVGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETT
-LVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK-------------
----NRAKKLPENANGVNSNSSS------------------SERLKQIQIETAEAFANGCA
-EEASIAMLERQ-CNNGKKISSNDTPYS-------------RT-R-EKLELKRERKAARTL
-AIITGAFLICWLPFFIIALIGPF-VDP-E--GIPPFARSFVLWLGYFNSLLNPIIYTIFS
-PEFRSAFQ-KIL----------FG---KYRRGHR--------------------------
-------------------------------------------------------------
-----------------------------------------------------
+M-ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG------------------L
+VTSDFNDSYGLTG-----QFI--NGSHSSRSRDNASAN-DTSATNMTDDRYWSLTVY---
+----------SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVA
+DLMVAVLV-MPLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYW
+AVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTCII
+S--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE
+ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK----------
+------NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS
+-IAMLERQ-CNNGKKISSNDTPYS-------------RT-REKLELKRERKAARTLAIIT
+GAFLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRS
+AFQKILF--------GKYRRG---------------------------------------
+------------------------------------------------------------
+-------------------------HR
> 35=p A47174 serotonin receptor, 5HTlym receptor - great pond snail
-M-ANFTFGDLALDVARMGGLASTPSGLRS-----------------TGLTTPGLSPTG--
-------------------------------LVTS--DFNDSYGLTGQF-IN-GSHS----
------------------SRSRDNAS-ANDT-------------SATNMTDDRYWS-LTVY
-SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVADLMVAVLV-M
-PLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYWAVTS-IDYIR
-RRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NNDPDKTGTCIISQDK--------
---GYTIFSTVGAFYLPMLVMMIIYIRIW-------LVARSRIRKDKFQMTKARLKTEETT
-LVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK-------------
----NRAKKLPENANGVNSNSSS------------------SERLKQIQIETAEAFANGCA
-EEASIAMLERQ-CNNGKKISSNDTPYS-------------RT-R-EKLELKRERKAARTL
-AIITGAFLICWLPFFIIALIGPF-VDP-E--GIPPFARSFVLWLGYFNSLLNPIIYTIFS
-PEFRSAFQ-KIL----------FG---KYRRGHR--------------------------
-------------------------------------------------------------
-----------------------------------------------------
+M-ANFTFGDLALDVARMGGLASTPSGLRSTGLTTPGLSPTG------------------L
+VTSDFNDSYGLTG-----QFI--NGSHSSRSRDNASAN-DTSATNMTDDRYWSLTVY---
+----------SHEHLVLTSVILGLFVLCCIIGNCFVIAAVMLERSLHNVANYLILSLAVA
+DLMVAVLV-MPLSVVSE-IS-KVWF--LHSEVCDMWISVDVLCCTASILHLVAIAMDRYW
+AVTS-IDYIRRRSARRILLMIMVVWIVALFI-SIPPLFG-WRDP--NN--DPDKTGTCII
+S--------QDKGYTIFSTVGAFYLPMLVMMIIYIRIWLVARSRIRKDKFQMTKARLKTE
+ETTLVASPKTEYSVVSDCNGCNSPDSTTEKKKRRAPFKSYGCSPRPERKK----------
+------NRAKKLPENANGVNSNSSSSERLKQIQIE-----------TAEAFANGCAEEAS
+-IAMLERQ-CNNGKKISSNDTPYS-------------RT-REKLELKRERKAARTLAIIT
+GAFLICWLPFFIIALIGPF-VDPE--GIPPFARSFVLWLGYFNSLLNPIIYTIFSPEFRS
+AFQKILF--------GKYRRG---------------------------------------
+------------------------------------------------------------
+-------------------------HR
> 36== X95604 1 Bombyx mori serotonin receptor <antennae>[InsectBiochem.Mol.Bi
-M-EGAE-GQEELDWEAL--YLRLP--LQN-----------------CSWNSTGWEPNW--
-------------------------------NVTV--VPNTTW-----------WQA----
------------------SAPFDTPA-ALVR-------------AAAK-------------
---------AVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVADLLVACLV-M
-PLGAVYE-VV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYWAVTN-IDYIH
-ASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDLRCVVSQDV--------
---GYQIFATASSFYVPVLIILILYWRIY-------QTARKRIR-----------------
-----------------------------RRRGATARGGVGPPP-----------------
---------VPAGGALVAGGGSGGIAAAVVAVI--------GRPLPTISETTTTGFTNVSS
-NNTS---PEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKP-K-EAADSKRERKAAKTL
-AIITGAFVACWLPFFVLAILVPT-CDC-E---VSPVLTSLSLWLGYFNSTLNPVIYTVFS
-PEFRHAFQ-RLL----------CGRRVRRRRAPQ--------------------------
-------------------------------------------------------------
-----------------------------------------------------
+M-EGAE-GQEELD----WEALYLRLPLQNCSWNSTGWEPNW------------------N
+VTVVPNTTW-----------------------WQASAPFDTPAALVR-------------
+--------------AAAKAVVLGLLILATVVGNVFVIAAILLERHLRSAANNLILSLAVA
+DLLVACLV-MPLGAVYE-VV-QRWT--LGPELCDMWTSGDVLCCTASILHLVAIALDRYW
+AVTN-IDYIHASTAKRVGMMIACVWTVSFFV-CIAQLLG-WKDPDWNQRVSEDL--RCVV
+S--------QDVGYQIFATASSFYVPVLIILILYWRIYQTARKRIRR-------------
+--------------------------------RRGATARGGVGPPPVP------------
+---------------AGGALVAGGGSGGIAAAVVAVIGRP---LPTISETTTTGFTNVSS
+NNTSPEKQSCANGLEADPPTTGYGAVAAAYYPSLVRRKP-KEAADSKRERKAAKTLAIIT
+GAFVACWLPFFVLAILVPT-CDCE---VSPVLTSLSLWLGYFNSTLNPVIYTVFSPEFRH
+AFQRLLC--------GRRVRRR----R---------------------------------
+------------------------------------------------------------
+------------------------APQ
>AJ006331.1_1230
-c---------------------------------------------------cauggcgu
+c--------------------------------ca-------------------uggcgu
uaguaugagugucgugcagccuccaggccccccccucccgggagagccauaguggucugc
ggaaccggugaguacaccggaaucgcuggggugaccggguccuuucuuggaacaacccgc
ucaauacccagaaauuugggcgugcccccgcgagaucacuagccgaguaguguugggucg
--- /dev/null
+mafft sample > x
+diff x sample.fftns2 || exit
+
+mafft --maxiterate 100 sample > x
+diff x sample.fftnsi || exit
+
+mafft --dpparttree sample > x
+diff x sample.dpparttree || exit
+
+mafft --globalpair sample > x
+diff x sample.gins1 || exit
+
+mafft --globalpair --maxiterate 100 sample > x
+diff x sample.ginsi || exit
+
+mafft --allowshift --globalpair --maxiterate 100 sample > x
+diff x sample.ginsi.allowshift || exit
+
+mafft --localpair sample > x
+diff x sample.lins1 || exit
+
+mafft --localpair --maxiterate 100 sample > x
+diff x sample.linsi || exit
+
+mafft --parttree sample > x
+diff x sample.parttree || exit
+
+mafft-qinsi samplerna > x
+diff x samplerna.qinsi || exit
+
+mafft-xinsi samplerna > x
+diff x samplerna.xinsi || exit
+
+rm x