show_default_node_shapes_internal: no
show_default_node_shapes_external: no
show_node_shapes_for_nodes_with_vis_data: yes
-default_node_size: 4
+default_node_size: 7
default_node_shape: rectangle
default_node_fill: solid
pdf_export_line_width: 0.5
# BIONJ:
# -----------------------------------------------------
-our $BIONJ = "";
-our $BIONJ_VERSION = "";
+our $BIONJ = "";
+our $BIONJ_VERSION = "";
# WEIGHBOR:
# -----------------------------------------------------
-our $WEIGHBOR = "";
-our $WEIGHBOR_VERSION = "";
+our $WEIGHBOR = "";
+our $WEIGHBOR_VERSION = "";
# PHYML:
# -----------------------------------------------------
-our $PHYML = $SOFTWARE_DIR."PHYLO/PhyML/PhyML-3.1/PhyML-3.1/PhyML-3.1_linux64";
-our $PHYML_VERSION = "3.1";
+our $PHYML = $SOFTWARE_DIR."PHYLO/PhyML/PhyML-3.1/PhyML-3.1/PhyML-3.1_linux64";
+our $PHYML_VERSION = "3.1";
# RAXML:
# -----------------------------------------------------
-our $RAXML = $SOFTWARE_DIR."PHYLO/RAxML/20161215/standard-RAxML-master/raxmlHPC-AVX";
-our $RAXML_VERSION = "8.2.9";
+our $RAXML = $SOFTWARE_DIR."PHYLO/RAxML/20161215/standard-RAxML-master/raxmlHPC-AVX";
+our $RAXML_VERSION = "8.2.9";
# forester.jar. This jar file is currently available at: https://sites.google.com/site/cmzmasek/home/software/forester
# --------------------------------------------------------------------------------------------------------------------
-our $FORESTER_JAR = "/home/zma/git/forester/forester/java/forester.jar";
+our $FORESTER_JAR = "/home/zma/git/forester/forester/java/forester.jar";
# Tool from forester.jar to transfer support values:
# -------------------------------------------------
-our $SUPPORT_TRANSFER = $JAVA." -cp $FORESTER_JAR org.forester.application.support_transfer";
+our $SUPPORT_TRANSFER = $JAVA." -cp $FORESTER_JAR org.forester.application.support_transfer";
} ## executeProtpars
-
-# "Model of substitution" order for DQO TREE-PUZZLE 5.0:
+# "Model of substitution" order for TREE-PUZZLE 5.2:
+# For amino acids:
# Auto
# m -> Dayhoff (Dayhoff et al. 1978)
# m -> JTT (Jones et al. 1992)
# m -> VT (Mueller-Vingron 2000)
# m -> WAG (Whelan-Goldman 2000)
# m -> Auto
+#
+# For nucleotides:
+# HKY (Hasegawa et al. 1985)
+# m -> TN (Tamura-Nei 1993)
+# m -> GTR (e.g. Lanave et al. 1980)
+# m -> SH (Schoeniger-von Haeseler 1994)
+# m -> HKY (Hasegawa et al. 1985)
+#
# One argument:
-# matrix option: 0 = JTT; 2 = BLOSUM 62; 3 = mtREV24;
-# 5 = VT; 6 = WAG; 7 = auto; PAM otherwise
-# Last modified: 07/07/01
+# matrix option:
+# 0 = JTT
+# 2 = BLOSUM 62
+# 3 = mtREV24
+# 5 = VT
+# 6 = WAG
+# 7 = auto
+# 9 = HKY [na]
+# 10 = TN [na]
+# 11 = GTR [na]
+# 12 = SH [na]
+# PAM otherwise
+# Last modified: 17/04/26
sub setModelForPuzzle {
my $matrix_option = $_[ 0 ];
my $matr = "";
- if ( $matrix_option == 0 ) { # JTT
+ if ( $matrix_option == 0 || $matrix_option == 11 ) { # JTT or GTR
$matr = "
m
m";
m
m";
}
- elsif ( $matrix_option == 3 ) { # mtREV24
+ elsif ( $matrix_option == 3 || $matrix_option == 12) { # mtREV24 or SH
$matr = "
m
m
m
m";
}
- elsif ( $matrix_option == 7 ) { # auto
+ elsif ( $matrix_option == 7 || $matrix_option == 9 ) { # auto or HKY
$matr = "";
- }
+ }
+ elsif ( $matrix_option == 10 ) { # TN
+ $matr = "
+m"
+ }
else { # PAM
$matr = "
m"
use forester;
my $VERSION = "1.0.1";
-my $LAST_MODIFIED = "2017/02/07";
+my $LAST_MODIFIED = "2017/04/26";
my $RAXML_MODEL_BASE = "PROTGAMMA";
my $RAXML_ALGORITHM = "a";
# 1 = PAM
# 2 = BLOSUM 62
# 3 = mtREV24
- # 5 = VT - default
+ # 5 = VT
# 6 = WAG
# 7 = auto in puzzle
# 8 = DCMut in PHYML, VT in TREE-PUZZLE
+ # 9 = HKY [na]
+ # 10 = TN [na]
+ # 11 = GTR [na]
+ # 12 = SH [na]
my $rate_heterogeneity = 0; # 0 = Uniform rate (default)
# 1 = 8 Gamma distributed rates
# 2 = Two rates (1 invariable + 1 variable)
if ( $options =~ /D/ ) {
$matrix = 8; # DCMut in PHYML and RAXML, VT in PUZZLE
}
+ if ( $options =~ /H/ ) {
+ $matrix = 9; # HKY
+ }
+ if ( $options =~ /T/ ) {
+ $matrix = 10; # TN
+ }
+ if ( $options =~ /Z/ ) {
+ $matrix = 11; # GTR
+ }
+ if ( $options =~ /C/ ) {
+ $matrix = 12; # SH
+ }
if ( $options =~ /S(\d+)/ ) {
$seed = $1;
}
elsif ( $matrix == 8 ) {
$log = $log."DCMut (Kosial and Goldman, 2005) in PHYML and RAxML, VT in TREE-PUZZLE\n";
}
+
+elsif ( $matrix == 9 ) {
+ $log = $log."HKY (Hasegawa et al. 1985) in TREE-PUZZLE\n";
+}
+elsif ( $matrix == 10 ) {
+ $log = $log."TN (Tamura-Nei 1993) in TREE-PUZZLE\n";
+}
+elsif ( $matrix == 11 ) {
+ $log = $log."GTR (e.g. Lanave et al. 1980)in TREE-PUZZLE\n";
+}
+elsif ( $matrix == 12 ) {
+ $log = $log."SH (Schoeniger-von Haeseler 1994) in TREE-PUZZLE\n";
+}
+
+
else {
&dieWithUnexpectedError( "Unknown model: matrix=$matrix" );
}
W : Use WAG matrix (Whelan-Goldman 2000) in TREE-PUZZLE and/or PHYML, RAXML, default: VT.
P : Use PAM matrix (Dayhoff et al. 1978) in TREE-PUZZLE and/or PHYML, RAXML, default: VT.
D : Use DCMut matrix (Kosial and Goldman, 2005) in PHYML, RAXML, VT in TREE-PUZZLE.
- A : Let TREE-PUZZLE choose which matrix to use, default: VT
+ A : Let TREE-PUZZLE choose which matrix to use, default: VT.
+ H : Use HKY (Hasegawa et al. 1985) in TREE-PUZZLE [for nucleic acids]
+ T : Use TN (Tamura-Nei 1993) in TREE-PUZZLE [for nucleic acids]
+ Z : Use GTR (e.g. Lanave et al. 1980) in TREE-PUZZLE [for nucleic acids]
+ C : Use SH (Schoeniger-von Haeseler 1994) in TREE-PUZZLE [for nucleic acids]
E : Exact parameter estimates in TREE-PUZZLE, default: Approximate.
Model of rate heterogeneity in TREE-PUZZLE (default: Uniform rate):
g : 8 Gamma distributed rates
--- /dev/null
+16 76
+0%1061 DKQQLAIKVTCNAVYGFTGVASGLLPCLKIAETVTLQGRTMLERTKHYVESLQPVDLERICQRPIAV-RADPSLRV
+1%1061 DKQQLAIKVTCNSVYGFTGVASGILPCIPIAETVTLQGRTMLEKSKAFVEMITPERLSDIV-SYPVPDP-DASFRV
+2%1061 DKQQLAIKVTCNSVYGFTGVASGMLPCLMIAETVTLQGRTMLEKTKQFVENLDVQSLQQICPTQTLKQHPTPRFTV
+3%1061 DKQQLAIKVTCNAVYGFTGVASGMLPCLKIAETITMQGRAMLEKTKVFVENLSHEDLRSICKVGSIPNVFDK----
+4%1061 DKQQLAIKVTCNSVYGFTGVASGLLPCVTIAETITLQGRTMLERCKRFVEAISPEYLSNIV-SAPFTEP-NARFRV
+5%1061 DKQQLAIKVTCNSVYGFTGVASGLLPCVTIAETITLQGRTMLERCKRFVEAISPEYLSNIV-SSPFTKP-NARFRV
+6%1061 DKQQLAIKVTCNAVYGFTGVASGMLPCLKIAETITMQGRAMLEKTKVFVENLSHEDLHSICKVGFMPNSIDKPFKV
+7%1061 DKQQLAIKVTCNAVYGFTGVASGLLPCVTIAETVTLQGRNMLEKSKQFIEAITPEKLMTLV-SEPFQXP-DARFRV
+8%1061 DKQQLAIKVTCNAVYGFTGVASGILPCLKIAETVTFQGRRMLEKSKEYIESLTPEMLSHII-QGPVNSP-GASFRV
+9%1061 DKQQLAIKVTCNAVYGFTGVASGILPCLNIAETVTLQGRKMLETSQAFVEGISPTALADLL-QRPIESP-EARFKV
+a%1061 DKQQLAIKVTCNAVYGFTGVASGLLPCINIAETVTLRGRTMLEMSKSYVEALTVSDLRERL-GREVTGP-DAKFRV
+b%1061 DKQQLAIKVTCNAVYGFTGVASGLLPCINIAETVTLRGRTMLEMSKSYVEALTTDDLRTRL-GRGVTGH-GARFRV
+c%1061 DKQQLAIKVTCNAVYGFTGVASGLLPCINIAETVTLRGRTMLEMSKSYVEALTTEDLRTRL-GREVTRH-GARFRV
+d%1061 DKQQLAIKVTCNAVYGFTGVASGILPCLNIAETVTLQGRKMLERSQAFVEAISPERLVGLL-RKPINST-GARFKV
+e%1061 DKQQLAIKVTCNAVYGFTGVASGILPCLNIAETVTLQGRKMLERSQAFVEAISPERLVGLL-RKPINST-DARFKV
+f%1061 DKQQLAIKVTCNSVYGFTGVASGILPCLNIAETVTLQGRRMLEMSQSFVEAISPERLSVLL-HRPIEHP-NARFKV
--- /dev/null
+11 201
+0 ATGAGCACGAATCCTAAACCTCAAAGAAAAACCAAACGTAACACCAACCGTCGCCCACAGGACGTCAAGTTCCCGGGTGGCGGTCAGATCGTTGGTGGAGTTTACTTGTTGCCGCGCAGGGGCCCTAGATTGGGTGTGCGCGCGACGAGGAAGACTTCCGAGCGGTCGCAACCTCGAGGTAGACGTCAGCCTATCCCCAAG
+1 ATGAGCACGAATCCTAAACCTCAAAAAAAAAACAAACGTAACACCAACCGTCGCCCACAGGACGTCAAGTTCCCGGGTGGCGGTCAGATCGTTGGTGGAGTTTACTTGTTGCCGCGCAGGGGCCCTAGATTGGGTGTGCGCGCGACGAGAAAGACTTCCGAGCGGTCGCAACCTCGAGGTAGACGTCAGCCTATCCCCAAG
+2 ATGAGCACGAATCCTAAACCTCAAAGAAAAACCAAACGTAACACCAACCGTCGCCCACAGGACGTCAAGTTCCCGGGTGGCGGTCAGATCGTTGGTGGAGTTTACTTGTTGCCGCGCAGGGGCCCTAGATTGGGTGTGCGCGCGACGAGGAAGACTTCCGAGCGGTCGCAACCTCGAGGTAGACGTCAGCCTATCCCCAAG
+3 ATGAGCACGAATCCTAAACCTCAAAGAAAAACCAAACGTAACACCAACCGTCGCCCACAGGACGTTAAGTTCCCGGGTGGCGGTCAGATCGTTGGTGGAGTTTACTTGTTGCCGCGCAGGGGCCCTAGATTGGGTGTGCGCGCGACGAGGAAGACTTCCGAGCGGTCGCAACCTCGAGGTAGACGTCAGCCTATCCCCAAG
+4 ATGAGCACGAATCCTAAACCTCAAAGAAAAACCAAACGTAACACCAACCGTCGCCCACAGGACGTCAAGTTCCCGGGTGGCGGTCAGATCGTTGGTGGAGTTTACTTGTTGCCGCGCAGGGGCCCTAGATTGGGTGTGCGCGCGACGAGGAAGACTTCCGAGCGGTCGCAACCTCGAGGTAGACGTCAGCCTATCCCCAAG
+5 ATGAGCACAAATCCTAAACCTCAAAGAAAAACCAAACGTAACACCAACCGCCGCCCACAGGACGTTAAGTTCCCGGGCGGTGGTCAGATCGTTGGTGGAGTTTACCTGTTGCCGCGCAGGGGCCCCAGGTTGGGTGTGCGCGCGACTAGGAAGACTTCCGAGCGGTCGCAACCTCGTGGAAGGCGACAACCTATCCCCAAG
+6 ATGAGCACGAATCCTAAACCTCAAAGAAAAACCAAACGTAACACCAACCGCCGCCCACAGGACGTCAAGTTCCCGGGCGGTGGTCAGATCGTTGGTGGAGTTTACCTGTTGCCGCGCAGGGGCCCCAGGTTGGGTGTGCGCGCGCCCAGGAAGACTTCCGAGCGGTCGCAACCTCGTGGAAGGCGACAACCTATCCCCAAG
+7 ATGAGCACGAATCCTAAACCTCAAAGAAAAACCAAACGTAACACCAACCGCCGCCCACAGGACGTTAAGTTCCCGGGTGGCGGCCAGATCGTTGGCGGAGTTTACTTGTTGCCGCGCAGGGGCCCCAGAGTGGGTGTGCGCGCGACGAGGAAGACTTCCGAGCGGTCGCAACCTCGCGGGAGGCGTCAGCCTATTCCCAAG
+8 ATGAGCACGAATCCTAAACCTCAAAGAAAAACCAAACGTAACACCAACCGACGCCCACAGAACGTTAAGTTCCCGGGTGGCGGCCAGATCGTTGGCGGAGTTTGCTTGTTGCCGCGCAGGGGTCCCAGAGTGGGTGTGCGCGCGACGAGGAAGACTTCCGAGCGGTCACAACCTCGCGGAAGGCGTCAGCCTATTCCCAAG
+9 ATGAGCACGAATCCTAAACCTCAAAGAAAAACCAAACGTAACACCAACCGTCGCCCACAGGACGTCAAGTTCCCGGGTGGCGGACAGATCGTTGGTGGAGTTTACTTGTTGCCGCGCAGGGGCCCTAGATTGGGTGTGCGCGCGACGAGGAAGACTTCCGAGCGGTCGCAACCTCGAGGTAGACGTCAGCCTATCCCCAAG
+10 ATGAGCACAAATCCTAAACCTCAAAGAAAAACCAAACGTAACACCAACCGCCGCCCACAGGACGTCAAGTTCCCGGGTGGTGGTCAGATCGTTGGTGGAGTTTACCTGTTGCCGCGCAGGGGCCCCAGGTTGGGTGTGCGCGCGACTAGGAAGACTTCCGAGCGGTCGCAACCTCGTGGCAGGCGACAGCCTATTCCCAAG
require 'lib/evo/io/parser/msa_parser'
require 'lib/evo/msa/msa'
-#require 'iconv'
-
module Evoruby
+ class GeneralMsaParser < MsaParser
+ def initialize
+ end
- class GeneralMsaParser < MsaParser
-
- def initialize
- end
+ def parse( path )
+ Util.check_file_for_readability( path )
+ block = -1
+ current_seq_index_per_block = -1
+ current_name = nil
+ saw_ignorable = true
+ is_first = true
+ msa = Msa.new
- def parse( path )
- Util.check_file_for_readability( path )
- block = -1
- current_seq_index_per_block = -1
- current_name = nil
+ File.open( path ) do | file |
+ while line = file.gets
+ line.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "?")
+ if can_ignore?( line )
saw_ignorable = true
- is_first = true
- msa = Msa.new
- ic = Iconv.new( 'UTF-8//IGNORE', 'UTF-8' )
- File.open( path ) do | file |
- while line = file.gets
- line = ic.iconv( line )
- if can_ignore?( line )
- saw_ignorable = true
- elsif ( is_first && is_program_name_line?( line ) )
- elsif( line =~ /^\S+\s+.+\s*$/ || line =~ /^\s+.+\s*$/ || line =~ /^\S+\s*$/ )
- if ( saw_ignorable )
- block += 1
- current_seq_index_per_block = -1
- saw_ignorable = false
- end
- current_seq_index_per_block += 1
- if ( line =~ /^(\S+)\s+(.+?)\s*$/ )
- name = $1
- seq = $2.gsub( /\s/, '.' )
- a = msa.find_by_name( name, false, false )
- if ( a.length < 1 )
- msa.add( name, seq )
- elsif ( a.length == 1 )
- msa.get_sequence( a[ 0 ] ).append!( seq )
- else
- error_msg = "Unexpected error at line: " + line
- raise IOError, error_msg
- end
- current_name = name
- elsif ( line =~ /^\s+(.+?)\s*$/ )
- seq = $1.gsub( /\s/, '.' )
- a = msa.find_by_name( current_name, false, false )
- if ( a.length != 1 )
- error_msg = "Unexpected error at line: " + line
- raise IOError, error_msg
- else
- msa.get_sequence( a[ 0 ] ).append!( seq )
- end
+ elsif ( is_first && is_program_name_line?( line ) )
+ elsif( line =~ /^\S+\s+.+\s*$/ || line =~ /^\s+.+\s*$/ || line =~ /^\S+\s*$/ )
+ if ( saw_ignorable )
+ block += 1
+ current_seq_index_per_block = -1
+ saw_ignorable = false
+ end
+ current_seq_index_per_block += 1
+ if ( line =~ /^(\S+)\s+(.+?)\s*$/ )
+ name = $1
+ seq = $2.gsub( /\s/, '.' )
+ a = msa.find_by_name( name, false, false )
+ if ( a.length < 1 )
+ msa.add( name, seq )
+ elsif ( a.length == 1 )
+ msa.get_sequence( a[ 0 ] ).append!( seq )
+ else
+ error_msg = "Unexpected error at line: " + line
+ raise IOError, error_msg
+ end
+ current_name = name
+ elsif ( line =~ /^\s+(.+?)\s*$/ )
+ seq = $1.gsub( /\s/, '.' )
+ a = msa.find_by_name( current_name, false, false )
+ if ( a.length != 1 )
+ error_msg = "Unexpected error at line: " + line
+ raise IOError, error_msg
+ else
+ msa.get_sequence( a[ 0 ] ).append!( seq )
+ end
- elsif ( line =~ /^(\S+)\s*$/ )
- seq = $1
- if block == 0
- error_msg = "First block cannot contain unnamed sequences"
- raise IOError, error_msg
- else
- msa.get_sequence( current_seq_index_per_block ).append!( seq )
- end
- current_name = nil
- end
- else
- error_msg = "Unexpected line: " + line
- raise IOError, error_msg
- end
- if ( is_first )
- is_first = false
- end
- end
+ elsif ( line =~ /^(\S+)\s*$/ )
+ seq = $1
+ if block == 0
+ error_msg = "First block cannot contain unnamed sequences"
+ raise IOError, error_msg
+ else
+ msa.get_sequence( current_seq_index_per_block ).append!( seq )
+ end
+ current_name = nil
end
- return msa
- end # def parse( path )
+ else
+ error_msg = "Unexpected line: " + line
+ raise IOError, error_msg
+ end
+ if ( is_first )
+ is_first = false
+ end
+ end
+ end
+ return msa
+ end # def parse( path )
- private
+ private
- def can_ignore?( line )
- return ( line !~ /[A-Za-z\-?\*_\.]/ ||
- line =~ /^\s+[*\.:]/ ||
- line =~ /^\s*#/ ||
- line =~ /^\s*%/ ||
- line =~ /^\s*\/\// ||
- line =~ /^\s*!!/ )
- end
-
- def is_program_name_line?( line )
- return ( line =~ /^CLUSTAL\s/ ||
- line =~ /^MUSCLE\s\(/ ||
- line =~ /^PROBCONS\s/ )
- end
- end # class GeneralMsaParser
+ def can_ignore?( line )
+ return ( line !~ /[A-Za-z\-?\*_\.]/ ||
+ line =~ /^\s+[*\.:]/ ||
+ line =~ /^\s*#/ ||
+ line =~ /^\s*%/ ||
+ line =~ /^\s*\/\// ||
+ line =~ /^\s*!!/ )
+ end
+
+ def is_program_name_line?( line )
+ return ( line =~ /^CLUSTAL\s/ ||
+ line =~ /^MUSCLE\s\(/ ||
+ line =~ /^PROBCONS\s/ )
+ end
+ end # class GeneralMsaParser
end # module Evoruby
w.clean( @clean )
w.set_max_name_length( @name_length )
end
- msa_hash.each do |os, msa|
+ msa_hash.each do |os, m|
my_os = os.gsub(' ', '_').gsub('/', '_').gsub('(', '_').gsub(')', '_')
- io.write_to_file( msa, output + '_' + my_os, w )
+ io.write_to_file( m, output + '_' + my_os, w )
end
Util.print_message( PRG_NAME, "wrote " + msa_hash.length.to_s + " files" )
rescue Exception => e
Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
end
-
+
elsif ( @split > 0 )
begin
msas = msa.split( @split, true )
then
name=${BASH_REMATCH[1]}
echo $name
- perl /home/zma/git/forester/forester/perl/phylo_pl.pl -B1000Wq@1S9 $i ${name}_mafft_1000_g_05_20_tree
+ perl /home/zma/git/forester/forester/perl/phylo_pl.pl -B1000Wq@1S9X $i ${name}_mafft_1000_g_05_20_tree
rc=$?
if [[ $rc != 0 ]]
then