return _gui_menu_text_color;
}
+ static int getGuiFontSize() {
+ return 11;
+ }
+
int getMaxBaseFontSize() {
return _max_base_font_size;
}
ORDER_SUBTREE;
}
final static Font jcb_bold_font = new Font( Configuration
- .getDefaultFontFamilyName(), Font.BOLD, 9 );
+ .getDefaultFontFamilyName(), Font.BOLD, Configuration.getGuiFontSize() );
final static Font jcb_font = new Font( Configuration
- .getDefaultFontFamilyName(), Font.PLAIN, 9 );
+ .getDefaultFontFamilyName(), Font.PLAIN, Configuration.getGuiFontSize());
final static Font js_font = new Font( Configuration
- .getDefaultFontFamilyName(), Font.PLAIN, 9 );
+ .getDefaultFontFamilyName(), Font.PLAIN, Configuration.getGuiFontSize() );
private static final String RETURN_TO_SUPER_TREE_TEXT = "R";
private static final String SEARCH_TIP_TEXT = "Enter text to search for. Use ',' for logical OR and '+' for logical AND (not used in this manner for regular expression searches).";
private static final long serialVersionUID = -8463483932821545633L;
public final class MainFrameApplication extends MainFrame {
- private final static int FRAME_X_SIZE = 800;
- private final static int FRAME_Y_SIZE = 800;
+ private final static int FRAME_X_SIZE = 900;
+ private final static int FRAME_Y_SIZE = 900;
// Filters for the file-open dialog (classes defined in this file)
private static final long serialVersionUID = -799735726778865234L;
private static final boolean PREPROCESS_TREES = false;
n.getNodeData().getTaxonomy().setIdentifier( new Identifier( name ) );
break;
}
+ case CLADE_NAME:
+ n.setName( name );
+ break;
default: {
throw new IllegalArgumentException( "don't know what to do with " + field );
}
# Software directory:
# ---------------------
-our $SOFTWARE_DIR = "/home/czmasek/SOFTWARE/";
+our $SOFTWARE_DIR = "/home/zma/SOFTWARE/";
# Java virtual machine:
# ---------------------
-our $JAVA = $SOFTWARE_DIR."JAVA/jdk1.6.0_03/bin/java";
+our $JAVA = "java";
# Where all the temporary files can be created:
# Programs from Joe Felsenstein's PHYLIP package:
# -----------------------------------------------
-our $SEQBOOT = $SOFTWARE_DIR."PHYLIP/phylip-3.68/src/seqboot";
-our $NEIGHBOR = $SOFTWARE_DIR."PHYLIP/phylip-3.68/src/neighbor";
-our $PROTPARS = $SOFTWARE_DIR."PHYLIP/phylip-3.68/src/protpars";
-our $PROML = $SOFTWARE_DIR."PHYLIP/phylip-3.68/src/proml";
-our $FITCH = $SOFTWARE_DIR."PHYLIP/phylip-3.68/src/fitch";
-our $CONSENSE = $SOFTWARE_DIR."PHYLIP/phylip-3.68/src/consense";
-our $PHYLIP_VERSION = "3.68";
+our $SEQBOOT = $SOFTWARE_DIR."PHYLO/Phylip/Phylip3.695/phylip-3.696/exe/seqboot";
+our $NEIGHBOR = $SOFTWARE_DIR."PHYLO/Phylip/Phylip3.695/phylip-3.696/exe/neighbor";
+our $PROTPARS = $SOFTWARE_DIR."PHYLO/Phylip/Phylip3.695/phylip-3.696/exe/protpars";
+our $PROML = $SOFTWARE_DIR."PHYLO/Phylip/Phylip3.695/phylip-3.696/exe/proml";
+our $FITCH = $SOFTWARE_DIR."PHYLO/Phylip/Phylip3.695/phylip-3.696/exe/fitch";
+our $CONSENSE = $SOFTWARE_DIR."PHYLO/Phylip/Phylip3.695/phylip-3.696/exe/consense";
+our $PHYLIP_VERSION = "3.695";
# TREE-PUZZLE:
# ------------
-our $PUZZLE = $SOFTWARE_DIR."TREE_PUZZLE/tree-puzzle-5.2/src/puzzle";
+our $PUZZLE = $SOFTWARE_DIR."PHYLO/TREE-PUZZLE/tree-puzzle-5.2/src/puzzle";
our $PUZZLE_VERSION = "5.2";
# FASTME:
# -----------------------------------------------------
-our $FASTME = $SOFTWARE_DIR."FASTME/fastme2.0/fastme";
+our $FASTME = $SOFTWARE_DIR."PHYLO/FastME/fastme2.0/fastme";
our $FASTME_VERSION = "2.0";
# BIONJ:
# -----------------------------------------------------
-our $BIONJ = $SOFTWARE_DIR."BIONJ/bionj";
-our $BIONJ_VERSION = "[1997]";
+our $BIONJ = "";
+our $BIONJ_VERSION = "";
# WEIGHBOR:
# -----------------------------------------------------
-our $WEIGHBOR = $SOFTWARE_DIR."WEIGHBOR/Weighbor/weighbor";
-our $WEIGHBOR_VERSION = "1.2.1";
+our $WEIGHBOR = "";
+our $WEIGHBOR_VERSION = "";
# PHYML:
# -----------------------------------------------------
-our $PHYML = $SOFTWARE_DIR."PHYML/phyml_v2.4.4/exe/phyml_linux";
-our $PHYML_VERSION = "2.4.4";
+our $PHYML = $SOFTWARE_DIR."PHYLO/PhyML/PhyML-3.1/PhyML-3.1/PhyML-3.1_linux64";
+our $PHYML_VERSION = "3.1";
# RAXML:
# -----------------------------------------------------
-our $RAXML = $SOFTWARE_DIR."RAXML/RAxML-7.0.4/raxmlHPC";
-our $RAXML_VERSION = "7.0.4";
+our $RAXML = $SOFTWARE_DIR."PHYLO/RAxML/20161215/standard-RAxML-master/raxmlHPC-AVX";
+our $RAXML_VERSION = "8.2.9";
-# forester.jar. This jar file is currently available at: http://www.phylosoft.org
-# -------------------------------------------------------------------------------
+# forester.jar. This jar file is currently available at: https://sites.google.com/site/cmzmasek/home/software/forester
+# --------------------------------------------------------------------------------------------------------------------
-our $FORESTER_JAR = $SOFTWARE_DIR."FORESTER/DEV/forester/forester/java/forester.jar";
+our $FORESTER_JAR = "/home/zma/git/forester/forester/java/forester.jar";
&testForTextFilePresence( $inpwd );
my $command = "";
- if ( $bs > 0 ) {
+ if ( $bs > 1 ) {
$command = "$FASTME -b $init_opt -i $inpwd -n $bs -s b";
}
else {
#!/usr/bin/perl -W
#
-# $Id: phylo_pl.pl,v 1.32 2010/12/13 19:00:22 cmzmasek Exp $
-#
# FORESTER -- software libraries and applications
# for evolutionary biology research and applications.
#
-# Copyright (C) 2008-2014 Christian M. Zmasek
-# Copyright (C) 2008-2009 Burnham Institute for Medical Research
+# Copyright (C) 2017 Christian M. Zmasek
# All rights reserved
#
# This library is free software; you can redistribute it and/or
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#
-# Contact: phylosoft @ gmail . com
-# WWW: www.phylosoft.org/forester
+# Contact: cmzmasek at yahoo dot com
+# WWW: https://sites.google.com/site/cmzmasek/home/software/forester
#
#
#
use forester;
my $VERSION = "1.0.1";
-my $LAST_MODIFIED = "2009.10.02";
+my $LAST_MODIFIED = "2017/02/07";
my $RAXML_MODEL_BASE = "PROTGAMMA";
my $RAXML_ALGORITHM = "a";
sub printUsage {
print <<END;
-
-Copyright (C) 2002-2007 Christian M. Zmasek
+Copyright (C) 2017 Christian M Zmasek
All rights reserved
-Author: Christian M. Zmasek
-phylosoft\@gmail.com
-http://www.phylosoft.org
+Author: Christian M Zmasek
+cmzmasek at yahoo dot com
+https://sites.google.com/site/cmzmasek/home/software/forester
Requirements phylo_pl is part of the FORESTER collection of programs.
------------ Many of its global variables are set via forester.pm.
#
# = exe/tap
#
-# Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
-#
-# last modified: 05/18/2007
+# Copyright:: Copyright (C) 2017 Christian M Zmasek
+# License:: GNU Lesser General Public License (LGPL)
require 'lib/evo/tool/taxonomy_processor'
#
# = lib/evo/io/parser/fasta_parser - FastaParser class
#
-# Copyright:: Copyright (C) 20017 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
#
-# last modified: 05/17/2007
+# Last modified: 2017/02/07
require 'lib/evo/io/parser/msa_parser'
require 'lib/evo/msa/msa'
-#require 'iconv'
-
module Evoruby
class FastaParser < MsaParser
current_seq = String.new()
name = String.new()
saw_first_seq = false
- ic = Iconv.new( 'UTF-8//IGNORE', 'UTF-8' )
File.open( path ) do | file |
while line = file.gets
- line = ic.iconv( line )
+
+ line.encode!("UTF-8", :invalid => :replace, :undef => :replace, :replace => "?")
if can_ignore?( line, saw_first_seq )
elsif line =~ /^\s*>\s*(.+)/
#
# = lib/evo/io/parser/general_msa_parser - GeneralMsaParser class
#
-# Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
#
-# last modified: 2009/10/08
+# Last modified: 2017/02/07
require 'lib/evo/io/parser/msa_parser'
require 'lib/evo/msa/msa'
#
-# To change this template, choose Tools | Templates
-# and open the template in the editor.
-
+# = lib/evo/io/parser/hmmscan_parser.rb - HmmscanParser class
+#
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
+#
+# Last modified: 2017/02/12
class HmmscanParser
-
def initialize file
@file = file
end
r.env_from = $20.to_i
r.env_to = $21.to_i
+ r.desc = $23
+
if r.number > r.out_of || r.hmm_from > r.hmm_to || r.ali_from > r.ali_to || r.env_from > r.env_to
raise IOError, "illogical format: " + line
end
attr_accessor :ali_to
attr_accessor :env_from
attr_accessor :env_to
+ attr_accessor :desc
end
#
# = lib/evo/msa/msa.rb - Msa class
#
-# Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
-#
-# $Id: msa.rb,v 1.11 2009/01/03 00:42:08 cmzmasek Exp $
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
#
+# Last modified: 2017/02/07
require 'lib/evo/util/constants'
x = get_number_of_seqs / n
for i in 0 ... n
msa = Msa.new()
- s = 0
-
+ #s = 0
if ( ( r > 0 ) && ( i == ( n - 1 ) ) )
y = x + r
if ( verbose )
#
-# = lib/evo/apps/domain_sequence_extractor.rb - DomainSequenceExtractor class
+# = lib/evo/apps/taxonomy_processor - TaxonomyProcessor class
#
-# Copyright:: Copyright (C) 2012 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
-#
-# $Id:Exp $
-
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
require 'lib/evo/util/constants'
require 'lib/evo/util/util'
require 'lib/evo/io/parser/hmmscan_domain_extractor'
module Evoruby
-
class DomainSequenceExtractor
PRG_NAME = "dsx"
- PRG_VERSION = "2.000"
+ PRG_VERSION = "2.001"
PRG_DESC = "extraction of domain sequences from hmmscan output"
- PRG_DATE = "20121001"
- COPYRIGHT = "2012 Christian M Zmasek"
- CONTACT = "phylosoft@gmail.com"
- WWW = "www.phylosoft.org"
+ PRG_DATE = "20170213"
+ WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
E_VALUE_THRESHOLD_OPTION = 'e'
LENGTH_THRESHOLD_OPTION = 'l'
FAILED_SEQS_SUFFIX = '_with_no_passing_domains.fasta'
HELP_OPTION_1 = 'help'
HELP_OPTION_2 = 'h'
-
def run()
Util.print_program_information( PRG_NAME,
- PRG_VERSION,
- PRG_DESC ,
- PRG_DATE,
- COPYRIGHT,
- CONTACT,
- WWW,
- STDOUT )
+ PRG_VERSION,
+ PRG_DESC ,
+ PRG_DATE,
+ WWW,
+ STDOUT )
ld = Constants::LINE_DELIMITER
end
if ( cla.is_option_set?( HELP_OPTION_1 ) ||
- cla.is_option_set?( HELP_OPTION_2 ) )
+ cla.is_option_set?( HELP_OPTION_2 ) )
print_help
exit( 0 )
end
disallowed = cla.validate_allowed_options_as_str( allowed_opts )
if ( disallowed.length > 0 )
Util.fatal_error( PRG_NAME,
- "unknown option(s): " + disallowed,
- STDOUT )
+ "unknown option(s): " + disallowed,
+ STDOUT )
end
domain_id = cla.get_file_name( 0 )
outfile = outfile[ 0 .. outfile.length - 5 ]
end
-
add_position = false
if ( cla.is_option_set?( ADD_POSITION_OPTION ) )
add_position = true
end
end
-
min_linker = nil
if ( cla.is_option_set?( MIN_LINKER_OPT ) )
begin
end
end
-
log = String.new
puts()
end
-
if ( add_position )
puts( "Add positions (rel to complete seq) to extracted domains: true" )
log << "Add positions (rel to complete seq) to extracted domains: true" + ld
begin
parser = HmmscanDomainExtractor.new()
domain_count = parser.parse( domain_id,
- hmmsearch_output,
- fasta_sequence_file,
- outfile,
- outfile + PASSED_SEQS_SUFFIX,
- outfile + FAILED_SEQS_SUFFIX,
- e_value_threshold,
- length_threshold,
- add_position,
- add_domain_number,
- add_species,
- min_linker,
- log )
+ hmmsearch_output,
+ fasta_sequence_file,
+ outfile,
+ outfile + PASSED_SEQS_SUFFIX,
+ outfile + FAILED_SEQS_SUFFIX,
+ e_value_threshold,
+ length_threshold,
+ add_position,
+ add_domain_number,
+ add_species,
+ min_linker,
+ log )
rescue ArgumentError, IOError => e
Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
#
# = lib/evo/apps/domains_to_forester - DomainsToForester class
#
-# Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
-#
-# $Id: Exp $
-#
-# last modified: 06/11/2007
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
require 'lib/evo/util/constants'
require 'lib/evo/util/util'
require 'lib/evo/sequence/domain_structure'
module Evoruby
-
class DomainsToForester
PRG_NAME = "d2f"
- PRG_DESC = "parsed hmmpfam output to forester format"
- PRG_VERSION = "1.001"
- PRG_DATE = "20120807"
- COPYRIGHT = "2012 Christian M Zmasek"
- CONTACT = "phylosoft@gmail.com"
- WWW = "www.phylosoft.org"
+ PRG_DESC = "converting of parsed hmmpfam output to forester format"
+ PRG_VERSION = "1.002"
+ PRG_DATE = "20170213"
+ WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
E_VALUE_THRESHOLD_OPTION = "e"
OVERWRITE_IF_SAME_FROM_TO_OPTION = "o"
HELP_OPTION_1 = "help"
HELP_OPTION_2 = "h"
-
def parse( domains_list_file,
- original_seqs_file,
- outfile,
- column_delimiter,
- e_value_threshold,
- overwrite_if_same_from_to )
+ original_seqs_file,
+ outfile,
+ column_delimiter,
+ e_value_threshold,
+ overwrite_if_same_from_to )
Util.check_file_for_readability( domains_list_file )
Util.check_file_for_readability( original_seqs_file )
Util.check_file_for_writability( outfile )
File.open( domains_list_file ) do | file |
while line = file.gets
if !is_ignorable?( line )
-
+
a = line.split( column_delimiter )
l = a.length
if ( ( l < 4 ) || ( e_value_threshold >= 0.0 && l < 5 ) )
domain_name = a[ 1 ]
seq_from = -1
seq_to = -1
- ##########################################
- if domain_name =~ /RRM_\d/
- puts "ignoring " + line
- next
- end
- ##########################################
-
-
+
begin
seq_from = a[ 2 ].to_i
rescue Exception
end # parse
-
-
-
def run()
Util.print_program_information( PRG_NAME,
- PRG_VERSION,
- PRG_DESC,
- PRG_DATE,
- COPYRIGHT,
- CONTACT,
- WWW,
- STDOUT )
+ PRG_VERSION,
+ PRG_DESC,
+ PRG_DATE,
+ WWW,
+ STDOUT )
+
+ if ( ARGV == nil || ( ARGV.length < 1 ) )
+ print_help
+ exit( -1 )
+ end
begin
cla = CommandLineArguments.new( ARGV )
end
if ( cla.is_option_set?( HELP_OPTION_1 ) ||
- cla.is_option_set?( HELP_OPTION_2 ) )
+ cla.is_option_set?( HELP_OPTION_2 ) )
print_help
exit( 0 )
end
- if cla.get_number_of_files != 3
+ unless ( cla.get_number_of_files == 1 || cla.get_number_of_files == 2 || cla.get_number_of_files == 3 )
print_help
exit( -1 )
end
disallowed = cla.validate_allowed_options_as_str( allowed_opts )
if ( disallowed.length > 0 )
Util.fatal_error( PRG_NAME,
- "unknown option(s): " + disallowed,
- STDOUT )
+ "unknown option(s): " + disallowed,
+ STDOUT )
end
- domains_list_file = cla.get_file_name( 0 )
- original_sequences_file = cla.get_file_name( 1 )
- outfile = cla.get_file_name( 2 )
-
-
e_value_threshold = -1.0
if cla.is_option_set?( E_VALUE_THRESHOLD_OPTION )
begin
Util.fatal_error( PRG_NAME, "attempt to use a negative E-value threshold", STDOUT )
end
end
+
+ domains_list_file = cla.get_file_name( 0 )
+ original_sequences_file = ""
+ outfile = ""
+ if (cla.get_number_of_files == 3)
+ original_sequences_file = cla.get_file_name( 1 )
+ outfile = cla.get_file_name( 2 )
+ elsif (cla.get_number_of_files == 1 || cla.get_number_of_files == 2 )
+ if ( cla.get_number_of_files == 2 )
+ original_sequences_file = cla.get_file_name( 1 )
+ else
+ hmmscan_index = domains_list_file.index("hmmscan")
+ if ( hmmscan_index != nil )
+ prefix = domains_list_file[0 .. hmmscan_index-1 ]
+ suffix = Constants::ID_NORMALIZED_FASTA_FILE_SUFFIX
+ files = Dir.entries( "." )
+ matching_files = Util.get_matching_files( files, prefix, suffix)
+ if matching_files.length < 1
+ Util.fatal_error( PRG_NAME, 'no file matching [' + prefix +
+ '...' + suffix + '] present in current directory: need to indicate <file containing complete sequences in fasta format> as second argument' )
+ end
+ if matching_files.length > 1
+ Util.fatal_error( PRG_NAME, 'more than one file matching [' +
+ prefix + '...' + suffix + '] present in current directory: need to indicate <file containing complete sequences in fasta format> as second argument' )
+ end
+ original_sequences_file = matching_files[ 0 ]
+ end
+ end
+ outfile = domains_list_file
+ if (outfile.end_with?(Constants::DOMAIN_TABLE_SUFFIX) )
+ outfile = outfile.chomp(Constants::DOMAIN_TABLE_SUFFIX)
+ end
+ if ( e_value_threshold >= 0.0 )
+ outfile = outfile + Constants::DOMAINS_TO_FORESTER_EVALUE_CUTOFF_SUFFIX + e_value_threshold.to_s
+ end
+ outfile = outfile + Constants::DOMAINS_TO_FORESTER_OUTFILE_SUFFIX
+ end
+
overwrite_if_same_from_to = false
if ( cla.is_option_set?( OVERWRITE_IF_SAME_FROM_TO_OPTION ) )
overwrite_if_same_from_to = true
end
puts
- puts( "Domains list file : " + domains_list_file )
- puts( "Fasta sequencefile (complete sequences): " + original_sequences_file )
- puts( "Outputfile : " + outfile )
+ puts( "Domain table : " + domains_list_file )
+ puts( "Fasta sequence file (complete sequences): " + original_sequences_file )
+ puts( "Outputfile : " + outfile )
if ( e_value_threshold >= 0.0 )
- puts( "E-value threshold : " + e_value_threshold.to_s )
+ puts( "E-value threshold : " + e_value_threshold.to_s )
else
- puts( "E-value threshold : no threshold" )
+ puts( "E-value threshold : no threshold" )
end
if ( overwrite_if_same_from_to )
- puts( "Overwrite if same from and to : true" )
+ puts( "Overwrite if same from and to : true" )
else
- puts( "Overwrite if same from and to : false" )
+ puts( "Overwrite if same from and to : false" )
end
puts
begin
parse( domains_list_file,
- original_sequences_file,
- outfile,
- " ",
- e_value_threshold,
- overwrite_if_same_from_to )
+ original_sequences_file,
+ outfile,
+ " ",
+ e_value_threshold,
+ overwrite_if_same_from_to )
rescue ArgumentError, IOError, StandardError => e
Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
Util.fatal_error( PRG_NAME, "unexpected exception: " + e.to_s, STDOUT )
end
-
puts
+ Util.print_message( PRG_NAME, "wrote: " + outfile )
+ Util.print_message( PRG_NAME, "next steps in standard analysis pipeline: hmmsearch followed by dsx.rb")
Util.print_message( PRG_NAME, 'OK' )
puts
puts
puts( "Usage:" )
puts
- puts( " " + PRG_NAME + ".rb [options] <domains list file (parsed hmmpfam output)> <file containing complete sequences in fasta format> <outputfile>" )
+ puts( " " + PRG_NAME + ".rb [options] <domain table (parsed hmmpfam output)> [file containing complete sequences in fasta format] [outputfile]" )
puts()
puts( " options: -" + E_VALUE_THRESHOLD_OPTION + "=<f> : E-value threshold, default is no threshold" )
puts( " -" + OVERWRITE_IF_SAME_FROM_TO_OPTION + " : overwrite domain with same start and end with domain with better E-value" )
puts
+ puts( "Examples:" )
+ puts
+ puts( " " + PRG_NAME + ".rb P53_hmmscan_#{Constants::PFAM_V_FOR_EX}_10_domain_table P53_ni.fasta P53_hmmscan_300_10.dff" )
+ puts
+ puts( " " + PRG_NAME + ".rb P53_hmmscan_#{Constants::PFAM_V_FOR_EX}_10_domain_table P53_ni.fasta" )
+ puts
+ puts( " " + PRG_NAME + ".rb P53_hmmscan_#{Constants::PFAM_V_FOR_EX}_10_domain_table" )
+ puts()
end
-
-
def is_ignorable?( line )
return ( line !~ /[A-Za-z0-9-]/ || line =~ /^\s*#/)
end
-
end # class DomainsToForester
-
end # module Evoruby
#
# Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
# License:: GNU Lesser General Public License (LGPL)
-#
-# $Id: fasta_taxonomy_processor.rb,v 1.4 2010/12/13 19:00:11 cmzmasek Exp $
-
require 'lib/evo/util/util'
require 'lib/evo/msa/msa_factory'
require 'lib/evo/apps/tseq_taxonomy_processor'
module Evoruby
+ class FastaTaxonomyProcessor
+
+ PRG_NAME = "fasta_tap"
+ PRG_DATE = "2009.01.20"
+ PRG_DESC = "preprocessing of multiple sequence files in ncbi fasta format"
+ PRG_VERSION = "1.00"
+ WWW = "www.phylosoft.org"
+ def initialize()
+ @tax_ids_to_sp_taxonomies = Hash.new()
+ end
- class FastaTaxonomyProcessor
+ def run()
+
+ Util.print_program_information( PRG_NAME,
+ PRG_VERSION,
+ PRG_DESC,
+ PRG_DATE,
+ COPYRIGHT,
+ CONTACT,
+ WWW,
+ STDOUT )
+
+ if ARGV == nil || ARGV.length != 4
+ puts( "Usage: #{PRG_NAME}.rb <sp taxonomy file> <sequences in ncbi fasta format> <name for fasta outfile> <name for map outfile>" )
+ puts()
+ exit( -1 )
+ end
+
+ begin
+ cla = CommandLineArguments.new( ARGV )
+ rescue ArgumentError => e
+ Util.fatal_error( PRG_NAME, "error: " + e.to_s )
+ end
+ allowed_opts = Array.new
+ disallowed = cla.validate_allowed_options_as_str( allowed_opts )
+ if ( disallowed.length > 0 )
+ Util.fatal_error( PRG_NAME, "unknown option(s): " + disallowed )
+ end
+
+ sp_taxonomy_infile = cla.get_file_name( 0 )
+ sequences_infile = cla.get_file_name( 1 )
+ sequences_outfile = cla.get_file_name( 2 )
+ mapping_outfile = cla.get_file_name( 3 )
+
+ Util.fatal_error_if_not_readable( PRG_NAME, sp_taxonomy_infile )
+ Util.fatal_error_if_not_readable( PRG_NAME, sequences_infile )
+ Util.fatal_error_if_not_writable( PRG_NAME, mapping_outfile )
+ Util.fatal_error_if_not_writable( PRG_NAME, sequences_outfile )
+
+ sp_taxonomies = SpTaxonomyParser.parse( sp_taxonomy_infile )
+
+ Util.print_message( PRG_NAME, "read in taxonomic data for " + sp_taxonomies.size.to_s + " species from: " + sp_taxonomy_infile )
+
+ fasta_parser = FastaParser.new
+ msa_fac = MsaFactory.new
+
+ seqs = msa_fac.create_msa_from_file( sequences_infile, fasta_parser )
+
+ Util.print_message( PRG_NAME, "read in " + seqs.get_number_of_seqs.to_s + " sequences from: " + sequences_infile )
+
+ removed = seqs.remove_redundant_sequences!( true, true )
+
+ if removed.size > 0
+ Util.print_message( PRG_NAME, "going to ignore the following " + removed.size.to_s + " redundant sequences:" )
+ removed.each { | seq_name |
+ puts seq_name
+ }
+ Util.print_message( PRG_NAME, "will process " + seqs.get_number_of_seqs.to_s + " non-redundant sequences" )
+ end
+
+ mapping_out = File.open( mapping_outfile, "a" )
+
+ for i in 0 ... seqs.get_number_of_seqs
+ seq = seqs.get_sequence( i )
+ seq.set_name( Util::normalize_seq_name( modify_name( seq, i, sp_taxonomies, mapping_out ), 10 ) )
+ end
+
+ io = MsaIO.new()
+
+ w = FastaWriter.new()
+
+ w.set_max_name_length( 10 )
+ w.clean( true )
+ begin
+ io.write_to_file( seqs, sequences_outfile, w )
+ rescue Exception => e
+ Util.fatal_error( PRG_NAME, "failed to write file: " + e.to_s )
+ end
+ mapping_out.close()
+
+ Util.print_message( PRG_NAME, "wrote: " + mapping_outfile )
+ Util.print_message( PRG_NAME, "wrote: " + sequences_outfile )
+ Util.print_message( PRG_NAME, "OK" )
- PRG_NAME = "fasta_tap"
- PRG_DATE = "2009.01.20"
- PRG_DESC = "preprocessing of multiple sequence files in ncbi fasta format"
- PRG_VERSION = "1.00"
- COPYRIGHT = "2009 Christian M Zmasek"
- CONTACT = "phylosoft@gmail.com"
- WWW = "www.phylosoft.org"
+ end
- def initialize()
- @tax_ids_to_sp_taxonomies = Hash.new()
- end
+ private
- def run()
-
- Util.print_program_information( PRG_NAME,
- PRG_VERSION,
- PRG_DESC,
- PRG_DATE,
- COPYRIGHT,
- CONTACT,
- WWW,
- STDOUT )
-
- if ARGV == nil || ARGV.length != 4
- puts( "Usage: #{PRG_NAME}.rb <sp taxonomy file> <sequences in ncbi fasta format> <name for fasta outfile> <name for map outfile>" )
- puts()
- exit( -1 )
- end
-
- begin
- cla = CommandLineArguments.new( ARGV )
- rescue ArgumentError => e
- Util.fatal_error( PRG_NAME, "error: " + e.to_s )
- end
- allowed_opts = Array.new
- disallowed = cla.validate_allowed_options_as_str( allowed_opts )
- if ( disallowed.length > 0 )
- Util.fatal_error( PRG_NAME, "unknown option(s): " + disallowed )
- end
-
- sp_taxonomy_infile = cla.get_file_name( 0 )
- sequences_infile = cla.get_file_name( 1 )
- sequences_outfile = cla.get_file_name( 2 )
- mapping_outfile = cla.get_file_name( 3 )
-
- Util.fatal_error_if_not_readable( PRG_NAME, sp_taxonomy_infile )
- Util.fatal_error_if_not_readable( PRG_NAME, sequences_infile )
- Util.fatal_error_if_not_writable( PRG_NAME, mapping_outfile )
- Util.fatal_error_if_not_writable( PRG_NAME, sequences_outfile )
-
- sp_taxonomies = SpTaxonomyParser.parse( sp_taxonomy_infile )
-
- Util.print_message( PRG_NAME, "read in taxonomic data for " + sp_taxonomies.size.to_s + " species from: " + sp_taxonomy_infile )
-
- fasta_parser = FastaParser.new
- msa_fac = MsaFactory.new
-
- seqs = msa_fac.create_msa_from_file( sequences_infile, fasta_parser )
-
- Util.print_message( PRG_NAME, "read in " + seqs.get_number_of_seqs.to_s + " sequences from: " + sequences_infile )
-
- removed = seqs.remove_redundant_sequences!( true, true )
-
- if removed.size > 0
- Util.print_message( PRG_NAME, "going to ignore the following " + removed.size.to_s + " redundant sequences:" )
- removed.each { | seq_name |
- puts seq_name
- }
- Util.print_message( PRG_NAME, "will process " + seqs.get_number_of_seqs.to_s + " non-redundant sequences" )
- end
-
- mapping_out = File.open( mapping_outfile, "a" )
-
- for i in 0 ... seqs.get_number_of_seqs
- seq = seqs.get_sequence( i )
- seq.set_name( Util::normalize_seq_name( modify_name( seq, i, sp_taxonomies, mapping_out ), 10 ) )
- end
-
- io = MsaIO.new()
-
- w = FastaWriter.new()
-
- w.set_max_name_length( 10 )
- w.clean( true )
- begin
- io.write_to_file( seqs, sequences_outfile, w )
- rescue Exception => e
- Util.fatal_error( PRG_NAME, "failed to write file: " + e.to_s )
- end
- mapping_out.close()
-
- Util.print_message( PRG_NAME, "wrote: " + mapping_outfile )
- Util.print_message( PRG_NAME, "wrote: " + sequences_outfile )
- Util.print_message( PRG_NAME, "OK" )
+ def modify_name( seq, i, sp_taxonomies, mapping_outfile )
- end
+ #i = i + 1792
- private
-
- def modify_name( seq, i, sp_taxonomies, mapping_outfile )
-
- #i = i + 1792
-
- seq_desc = seq.get_name
-
- taxonomy_sn = nil
-
- if seq_desc =~ /\[(.+)\]/
- taxonomy_sn = $1
- else
- Util.fatal_error( PRG_NAME, "no taxonomy in [" + seq_desc + "]" )
- end
-
- matching_sp_taxonomy = nil
-
- sp_taxonomies.each { |sp_taxonomy|
- if ( sp_taxonomy.scientific_name == taxonomy_sn )
- matching_sp_taxonomy = sp_taxonomy
- end
- }
-
- if matching_sp_taxonomy == nil
- Util.fatal_error( PRG_NAME, "taxonomy [" + taxonomy_sn + "] for [" + seq_desc + "] not found" )
- end
-
- new_name = i.to_s( 16 ) + "_" + matching_sp_taxonomy.code
-
- gi = nil
- if seq_desc =~ /gi\|(.+?)\|/
- gi = $1
- else
- Util.fatal_error( PRG_NAME, "no gi in [" + seq_desc + "]" )
- end
-
- seq_name = ""
-
- if seq_desc =~ /\|\s*([^|]+?)\s*\[/
- seq_name = $1
- end
-
- if seq_name =~ /\[.+\]$/
- # Redundant taxonomy information hides here.
- seq_name = seq_name.sub(/\[.+\]$/, '')
- end
- if seq_name =~ /^\s*hypothetical\s+protein\s*/i
- # Pointless information.
- seq_name = seq_name.sub( /^\s*hypothetical\s+protein\s*/i, '' )
- end
- if seq_name =~ /^\s*conserved\s+hypothetical\s+protein\s*/i
- # Pointless information.
- seq_name = seq_name.sub( /^\s*conserved\s+hypothetical\s+protein\s*/i, '' )
- end
-
- if gi != nil
- mapping_outfile.print( new_name + "\t" +
- TseqTaxonomyProcessor::TAXONOMY_CODE + matching_sp_taxonomy.code + "\t" +
- TseqTaxonomyProcessor::TAXONOMY_ID + matching_sp_taxonomy.id + "\t" +
- TseqTaxonomyProcessor::TAXONOMY_ID_TYPE + "ncbi" + "\t" +
- TseqTaxonomyProcessor::TAXONOMY_SN + matching_sp_taxonomy.scientific_name + "\t" +
- TseqTaxonomyProcessor::SEQ_ACCESSION + gi.to_s + "\t" +
- TseqTaxonomyProcessor::SEQ_ACCESSION_SOURCE + "gi" + "\t" +
- TseqTaxonomyProcessor::SEQ_NAME + seq_name + "\t" +
- TseqTaxonomyProcessor::SEQ_MOL_SEQ + seq.get_sequence_as_string +
- Constants::LINE_DELIMITER )
- else
- mapping_outfile.print( new_name + "\t" +
- TseqTaxonomyProcessor::TAXONOMY_CODE + matching_sp_taxonomy.code + "\t" +
- TseqTaxonomyProcessor::TAXONOMY_ID + matching_sp_taxonomy.id + "\t" +
- TseqTaxonomyProcessor::TAXONOMY_ID_TYPE + "ncbi" + "\t" +
- TseqTaxonomyProcessor::TAXONOMY_SN + matching_sp_taxonomy.scientific_name + "\t" +
- TseqTaxonomyProcessor::SEQ_NAME + seq_name + "\t" +
- TseqTaxonomyProcessor::SEQ_MOL_SEQ + seq.get_sequence_as_string +
- Constants::LINE_DELIMITER )
-
- end
- new_name
- end
+ seq_desc = seq.get_name
+
+ taxonomy_sn = nil
+
+ if seq_desc =~ /\[(.+)\]/
+ taxonomy_sn = $1
+ else
+ Util.fatal_error( PRG_NAME, "no taxonomy in [" + seq_desc + "]" )
+ end
+ matching_sp_taxonomy = nil
+
+ sp_taxonomies.each { |sp_taxonomy|
+ if ( sp_taxonomy.scientific_name == taxonomy_sn )
+ matching_sp_taxonomy = sp_taxonomy
+ end
+ }
+
+ if matching_sp_taxonomy == nil
+ Util.fatal_error( PRG_NAME, "taxonomy [" + taxonomy_sn + "] for [" + seq_desc + "] not found" )
+ end
+
+ new_name = i.to_s( 16 ) + "_" + matching_sp_taxonomy.code
+
+ gi = nil
+ if seq_desc =~ /gi\|(.+?)\|/
+ gi = $1
+ else
+ Util.fatal_error( PRG_NAME, "no gi in [" + seq_desc + "]" )
+ end
+
+ seq_name = ""
+
+ if seq_desc =~ /\|\s*([^|]+?)\s*\[/
+ seq_name = $1
+ end
+
+ if seq_name =~ /\[.+\]$/
+ # Redundant taxonomy information hides here.
+ seq_name = seq_name.sub(/\[.+\]$/, '')
+ end
+ if seq_name =~ /^\s*hypothetical\s+protein\s*/i
+ # Pointless information.
+ seq_name = seq_name.sub( /^\s*hypothetical\s+protein\s*/i, '' )
+ end
+ if seq_name =~ /^\s*conserved\s+hypothetical\s+protein\s*/i
+ # Pointless information.
+ seq_name = seq_name.sub( /^\s*conserved\s+hypothetical\s+protein\s*/i, '' )
+ end
+
+ if gi != nil
+ mapping_outfile.print( new_name + "\t" +
+ TseqTaxonomyProcessor::TAXONOMY_CODE + matching_sp_taxonomy.code + "\t" +
+ TseqTaxonomyProcessor::TAXONOMY_ID + matching_sp_taxonomy.id + "\t" +
+ TseqTaxonomyProcessor::TAXONOMY_ID_TYPE + "ncbi" + "\t" +
+ TseqTaxonomyProcessor::TAXONOMY_SN + matching_sp_taxonomy.scientific_name + "\t" +
+ TseqTaxonomyProcessor::SEQ_ACCESSION + gi.to_s + "\t" +
+ TseqTaxonomyProcessor::SEQ_ACCESSION_SOURCE + "gi" + "\t" +
+ TseqTaxonomyProcessor::SEQ_NAME + seq_name + "\t" +
+ TseqTaxonomyProcessor::SEQ_MOL_SEQ + seq.get_sequence_as_string +
+ Constants::LINE_DELIMITER )
+ else
+ mapping_outfile.print( new_name + "\t" +
+ TseqTaxonomyProcessor::TAXONOMY_CODE + matching_sp_taxonomy.code + "\t" +
+ TseqTaxonomyProcessor::TAXONOMY_ID + matching_sp_taxonomy.id + "\t" +
+ TseqTaxonomyProcessor::TAXONOMY_ID_TYPE + "ncbi" + "\t" +
+ TseqTaxonomyProcessor::TAXONOMY_SN + matching_sp_taxonomy.scientific_name + "\t" +
+ TseqTaxonomyProcessor::SEQ_NAME + seq_name + "\t" +
+ TseqTaxonomyProcessor::SEQ_MOL_SEQ + seq.get_sequence_as_string +
+ Constants::LINE_DELIMITER )
+
+ end
+ new_name
end
+ end
+
end # module Evoruby
#
# = lib/evo/tool/hmmscan_summary.rb - HmmscanSummary class
#
-# Copyright:: Copyright (C) 2012 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
-#
-# $Id: hmmscan_parser.rb,v 1.5 2010/12/13 19:00:11 cmzmasek Exp $
-#
+# Copyright:: Copyright (C) 2017 Christian M Zmasek
+# License:: GNU Lesser General Public License (LGPL)
require 'set'
-
require 'lib/evo/util/constants'
require 'lib/evo/util/util'
require 'lib/evo/util/command_line_arguments'
require 'lib/evo/io/parser/hmmscan_parser'
module Evoruby
-
class HmmscanSummary
PRG_NAME = "hsp"
- PRG_VERSION = "2.002"
- PRG_DESC = "hmmscan summary"
- PRG_DATE = "130319"
- COPYRIGHT = "2013 Christian M Zmasek"
- CONTACT = "phyloxml@gmail.com"
+ PRG_VERSION = "2.003"
+ PRG_DESC = "Summarize hmmscan output tables into simpler tables"
+ PRG_DATE = "170213"
WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
DELIMITER_OPTION = "d"
HELP_OPTION_1 = "help"
HELP_OPTION_2 = "h"
- USE_AVOID_HMMS = true
- AVOID_HHMS = [ "RRM_1", "RRM_2", "RRM_3", "RRM_4", "RRM_5", "RRM_6" ]
- LIMIT_FOR_CLOSE_DOMAINS = 20
+ USE_AVOID_HMMS = false
+ AVOID_HHMS = [ "x", "y", "z" ]
+ LIMIT_FOR_CLOSE_DOMAINS = 20 # Used for protein architecture summary
def initialize
@domain_counts = Hash.new
def run
Util.print_program_information( PRG_NAME,
- PRG_VERSION,
- PRG_DESC,
- PRG_DATE,
- COPYRIGHT,
- CONTACT,
- WWW,
- STDOUT )
+ PRG_VERSION,
+ PRG_DESC,
+ PRG_DATE,
+ WWW,
+ STDOUT )
+
+ if ( ARGV == nil || ( ARGV.length < 1 ) )
+ print_help
+ exit( -1 )
+ end
begin
cla = CommandLineArguments.new( ARGV )
end
if ( cla.is_option_set?( HELP_OPTION_1 ) ||
- cla.is_option_set?( HELP_OPTION_2 ) )
+ cla.is_option_set?( HELP_OPTION_2 ) )
print_help
exit( 0 )
end
- if ( cla.get_number_of_files != 2 )
- print_help
- exit( -1 )
- end
-
allowed_opts = Array.new
allowed_opts.push( DELIMITER_OPTION )
allowed_opts.push( I_E_VALUE_THRESHOLD_OPTION )
disallowed = cla.validate_allowed_options_as_str( allowed_opts )
if ( disallowed.length > 0 )
Util.fatal_error( PRG_NAME,
- "unknown option(s): " + disallowed,
- STDOUT )
+ "unknown option(s): " + disallowed,
+ STDOUT )
end
inpath = cla.get_file_name( 0 )
- outpath = cla.get_file_name( 1 )
+
+ outpath = ""
+ if ( cla.get_number_of_files == 1 )
+ outpath = inpath + Constants::DOMAIN_TABLE_SUFFIX
+ elsif ( cla.get_number_of_files == 2 )
+ outpath = cla.get_file_name( 1 )
+ else
+ print_help
+ exit( -1 )
+ end
column_delimiter = "\t"
if ( cla.is_option_set?( DELIMITER_OPTION ) )
puts()
puts( "hmmpfam outputfile : " + inpath )
puts( "outputfile : " + outpath )
- puts( "species : " + species )
+
if ( i_e_value_threshold >= 0.0 )
puts( "i-E-value threshold : " + i_e_value_threshold.to_s )
else
if ( column_delimiter == "\t" )
puts( "column delimiter : TAB" )
else
- puts( "column delimiter : " + column_delimiter )
- end
- if fs_e_value_threshold >= 0.0
- puts( "E-value threshold : " + fs_e_value_threshold.to_s )
- else
- puts( "E-value threshold : no threshold" )
+ puts( "column delimiter : " + column_delimiter )
end
if !hmm_for_protein_output.empty?
puts( "HMM for proteins : " + hmm_for_protein_output )
+ puts( "species : " + species )
+ if fs_e_value_threshold >= 0.0
+ puts( "E-value threshold : " + fs_e_value_threshold.to_s )
+ else
+ puts( "E-value threshold : no threshold" )
+ end
end
puts()
begin
parse( inpath,
- outpath,
- column_delimiter,
- i_e_value_threshold,
- ignore_dufs,
- parse_descriptions,
- fs_e_value_threshold,
- hmm_for_protein_output,
- species )
+ outpath,
+ column_delimiter,
+ i_e_value_threshold,
+ ignore_dufs,
+ parse_descriptions,
+ fs_e_value_threshold,
+ hmm_for_protein_output,
+ species )
rescue IOError => e
Util.fatal_error( PRG_NAME, "error: " + e.to_s, STDOUT )
end
puts
puts( Util.draw_histogram( domain_counts, "#" ) )
puts
+ Util.print_message( PRG_NAME, "wrote: " + outpath )
+ Util.print_message( PRG_NAME, "next step in standard analysis pipeline: d2f.rb")
Util.print_message( PRG_NAME, 'OK' )
puts
# raises ArgumentError, IOError
def parse( inpath,
- outpath,
- column_delimiter,
- i_e_value_threshold,
- ignore_dufs,
- get_descriptions,
- fs_e_value_threshold,
- hmm_for_protein_output,
- species )
+ outpath,
+ column_delimiter,
+ i_e_value_threshold,
+ ignore_dufs,
+ get_descriptions,
+ fs_e_value_threshold,
+ hmm_for_protein_output,
+ species )
Util.check_file_for_readability( inpath )
Util.check_file_for_writability( outpath )
results.each do | r |
model = r.model
+ desc = r.desc
query = r.query
i_e_value = r.i_e_value
env_from = r.env_from
env_to = r.env_to
if ( ( i_e_value_threshold < 0.0 ) || ( i_e_value <= i_e_value_threshold ) ) &&
- ( !ignore_dufs || ( model !~ /^DUF\d+/ ) )
+ ( !ignore_dufs || ( model !~ /^DUF\d+/ ) )
count_model( model )
outfile.print( query +
- column_delimiter )
+ column_delimiter )
if ( get_descriptions )
outfile.print( desc +
- column_delimiter )
+ column_delimiter )
end
outfile.print( model +
- column_delimiter +
- env_from.to_s +
- column_delimiter +
- env_to.to_s +
- column_delimiter +
- i_e_value.to_s )
+ column_delimiter +
+ env_from.to_s +
+ column_delimiter +
+ env_to.to_s +
+ column_delimiter +
+ i_e_value.to_s )
outfile.print( Constants::LINE_DELIMITER )
end
if !prev_query.empty? && prev_query != query
if !hmmscan_results_per_protein.empty?
process_hmmscan_results_per_protein( hmmscan_results_per_protein,
- fs_e_value_threshold,
- hmm_for_protein_output,
- i_e_value_threshold,
- species )
+ fs_e_value_threshold,
+ hmm_for_protein_output,
+ i_e_value_threshold,
+ species )
end
hmmscan_results_per_protein.clear
end
if !hmm_for_protein_output.empty? && !hmmscan_results_per_protein.empty?
process_hmmscan_results_per_protein( hmmscan_results_per_protein,
- fs_e_value_threshold,
- hmm_for_protein_output,
- i_e_value_threshold,
- species )
+ fs_e_value_threshold,
+ hmm_for_protein_output,
+ i_e_value_threshold,
+ species )
end
outfile.flush()
end
def process_hmmscan_results_per_protein( hmmscan_results_per_protein,
- fs_e_value_threshold,
- hmm_for_protein_output,
- i_e_value_threshold,
- species )
+ fs_e_value_threshold,
+ hmm_for_protein_output,
+ i_e_value_threshold,
+ species )
dc = 0
# filter according to i-Evalue threshold
hmmscan_results_per_protein.each do | r |
-
if r.model == hmm_for_protein_output
if fs_e_value_threshold > 0.0 && r.fs_e_value > fs_e_value_threshold
return
s
end
-
def print_help()
puts( "Usage:" )
puts()
- puts( " " + PRG_NAME + ".rb [options] <hmmscan outputfile> <outputfile>" )
+ puts( " " + PRG_NAME + ".rb [options] <hmmscan outputfile> [outputfile]" )
+ puts()
+ puts( " options: -" + DELIMITER_OPTION + "=<s> : column delimiter for outputfile, default is TAB" )
+ puts( " -" + I_E_VALUE_THRESHOLD_OPTION + "=<f>: i-E-value threshold, default is no threshold" )
+ puts( " -" + PARSE_OUT_DESCRIPITION_OPTION + " : parse query description (in addition to query name)" )
+ puts( " -" + IGNORE_DUF_OPTION + " : ignore DUFs" )
+ puts( " -" + HMM_FOR_PROTEIN_OUTPUT + "=<s> : HMM for protein architectures summary" )
+ puts( " -" + FS_E_VALUE_THRESHOLD_OPTION + "=<f>: E-value threshold for full protein sequences, only for protein architectures summary" )
+ puts( " -" + SPECIES_OPTION + "=<s> : species for protein architectures summary" )
+ puts()
+ puts( "Example:" )
+ puts()
+ puts( " " + "hmmscan --nobias --domtblout P53_hmmscan_#{Constants::PFAM_V_FOR_EX}_10 -E 10 Pfam-A.hmm P53_ni.fasta" )
puts()
- puts( " options: -" + DELIMITER_OPTION + ": column delimiter for outputfile, default is TAB" )
- puts( " -" + I_E_VALUE_THRESHOLD_OPTION + ": i-E-value threshold, default is no threshold" )
- puts( " -" + PARSE_OUT_DESCRIPITION_OPTION + ": parse query description (in addition to query name)" )
- puts( " -" + IGNORE_DUF_OPTION + ": ignore DUFs" )
- puts( " -" + FS_E_VALUE_THRESHOLD_OPTION + ": E-value threshold for full protein sequences, only for protein summary" )
- puts( " -" + HMM_FOR_PROTEIN_OUTPUT + ": HMM for protein summary" )
- puts( " -" + SPECIES_OPTION + ": species for protein summary" )
+ puts( " " + PRG_NAME + ".rb P53_hmmscan_300_10" )
puts()
end
#
# = lib/evo/apps/phylogenies_decorator
#
-# Copyright:: Copyright (C) 2006-2008 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
#
-# decoration of phylogenies with sequence/species names and domain architectures
+# Last modified: 2017/02/09
#
-# $Id: phylogenies_decorator.rb,v 1.34 2010/12/13 19:00:11 cmzmasek Exp $
+# decoration of phylogenies with sequence/species names and domain architectures
#
# Environment variable FORESTER_HOME needs to point to the appropriate
# directory (e.g. setenv FORESTER_HOME $HOME/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/)
#DECORATOR_OPTIONS_SEQ_NAMES = '-r=1 -mdn'
#DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -sn'
- DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -tc -mp -or'
+ #DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -tc -mp -or'
+ DECORATOR_OPTIONS_SEQ_NAMES = '-p -t -mp -or'
# -mdn is a hidden expert option to rename e.g. "6_ORYLA3" to "6_[3]_ORYLA"
#DECORATOR_OPTIONS_SEQ_NAMES = '-sn -r=1'
#DECORATOR_OPTIONS_DOMAINS = '-r=1'
DOMAINS_MAPFILE_SUFFIX = '_hmmscan_10.dff'
SLEEP_TIME = 0.05
REMOVE_NI = true
- IDS_ONLY = true
+ IDS_ONLY = true #TODO this should be a command line option
+ FIXED_NIM_FILE = 'all.nim' #TODO this should be a command line option
TMP_FILE_1 = '___PD1___'
TMP_FILE_2 = '___PD2___'
LOG_FILE = '00_phylogenies_decorator.log'
JAVA_HOME = ENV[Constants::JAVA_HOME_ENV_VARIABLE]
PRG_NAME = "phylogenies_decorator"
- PRG_DATE = "2013.11.15"
+ PRG_DATE = "170209"
PRG_DESC = "decoration of phylogenies with sequence/species names and domain architectures"
PRG_VERSION = "1.02"
- COPYRIGHT = "2013 Christian M Zmasek"
- CONTACT = "phylosoft@gmail.com"
+ COPYRIGHT = "2017 Christian M Zmasek"
+ CONTACT = "phyloxml at gmail dot com"
WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
HELP_OPTION_1 = "help"
outfile = outfile.sub( /_ni_/, '_' )
end
- if File.exists?( outfile )
+ if File.exist?( outfile )
msg = counter.to_s + ': ' + phylogeny_file + ' -> ' + outfile +
' : already exists, skipping'
Util.print_message( PRG_NAME, msg )
domains_mapfile_name = nil
seqs_file_name = nil
- ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX )
-
+ if ( FIXED_NIM_FILE == nil )
+ ids_mapfile_name = get_file( files, phylogeny_id, IDS_MAPFILE_SUFFIX )
+ else
+ ids_mapfile_name = FIXED_NIM_FILE
+ end
+
unless IDS_ONLY
domains_mapfile_name = get_file( files, phylogeny_id, DOMAINS_MAPFILE_SUFFIX )
seqs_file_name = get_seq_file( files, phylogeny_id )
cmd = decorator + ' ' + DECORATOR_OPTIONS_DOMAINS + ' ' +
'-f=d ' + TMP_FILE_1 + ' ' +
- domains_mapfile_name + ' ' +TMP_FILE_2
+ domains_mapfile_name + ' ' + TMP_FILE_2
puts cmd
begin
execute_cmd( cmd, log )
end
def get_file( files_in_dir, phylogeny_id, suffix_pattern )
- matching_files = Array.new
-
- files_in_dir.each { | file |
-
- if ( !File.directory?( file ) &&
- file !~ /^\./ &&
- file !~ /^00/ &&
- file =~ /^#{phylogeny_id}.*#{suffix_pattern}$/ )
- matching_files << file
- end
- }
+ matching_files = Util.get_matching_files( files_in_dir, phylogeny_id, suffix_pattern )
if matching_files.length < 1
Util.fatal_error( PRG_NAME, 'no file matching [' + phylogeny_id +
'...' + suffix_pattern + '] present in current directory' )
#
# = lib/evo/apps/phylogeny_factory - PhylogenyFactory class
#
-# Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
#
-# $Id: phylogeny_factory.rb,v 1.32 2010/12/13 19:00:11 cmzmasek Exp $
+# Last modified: 2017/02/07
require 'lib/evo/util/constants'
require 'lib/evo/util/util'
PRG_DATE = "1301111"
PRG_DESC = "automated phylogeny reconstruction using queing system"
PRG_VERSION = "1.100"
- COPYRIGHT = "2013 Christian M Zmasek"
- CONTACT = "phylosoft@gmail.com"
- WWW = "www.phylosoft.org"
+ COPYRIGHT = "2017 Christian M Zmasek"
+ CONTACT = "cmzmasek at yahoo dot com"
+ WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
USE_JOB_SUBMISSION_SYSTEM_OPTION = 's'
BS_OPTION = 'b'
#
# = lib/evo/apps/taxonomy_processor - TaxonomyProcessor class
#
-# Copyright:: Copyright (C) 20017 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
-#
-
-
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
+require 'lib/evo/util/constants'
require 'lib/evo/util/util'
require 'lib/evo/msa/msa_factory'
require 'lib/evo/msa/msa'
require 'lib/evo/util/command_line_arguments'
module Evoruby
-
class TaxonomyProcessor
PRG_NAME = "tap"
- PRG_DATE = "170206"
- PRG_DESC = "replacement of species names in multiple sequence files"
- PRG_VERSION = "2.002"
- COPYRIGHT = "2017 Christian M Zmasek"
- CONTACT = "phylosoft@gmail.com"
- WWW = ""
+ PRG_DATE = "170213"
+ PRG_DESC = "Replacement of labels in multiple sequence files"
+ PRG_VERSION = "2.004"
+ WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
EXTRACT_TAXONOMY_OPTION = "t"
-
+ ANNOTATION_OPTION = "a"
+ HELP_OPTION_1 = "help"
+ HELP_OPTION_2 = "h"
def run()
Util.print_program_information( PRG_NAME,
- PRG_VERSION,
- PRG_DESC,
- PRG_DATE,
- COPYRIGHT,
- CONTACT,
- WWW,
- STDOUT )
-
- if ( ARGV == nil || ( ARGV.length != 1 && ARGV.length != 2 && ARGV.length != 3 && ARGV.length != 4 && ARGV.length != 5 && ARGV.length != 6 ) )
- puts( "Usage: #{PRG_NAME}.rb [options] <input sequences> [output sequences] [output id list]" )
- puts()
- puts( " options: -" + EXTRACT_TAXONOMY_OPTION + ": to extract taxonomy information from bracketed expression" )
- puts()
+ PRG_VERSION,
+ PRG_DESC,
+ PRG_DATE,
+ WWW,
+ STDOUT )
+
+ if ( ARGV == nil || ( ARGV.length < 1 ) )
+ print_help()
exit( -1 )
end
Util.fatal_error( PRG_NAME, "error: " + e.to_s )
end
- input = nil
- output = nil
- list_file = nil
+ if ( cla.is_option_set?( HELP_OPTION_1 ) ||
+ cla.is_option_set?( HELP_OPTION_2 ) )
+ print_help
+ exit( 0 )
+ end
+
+ input = nil
+ output = nil
+ list_file = nil
if cla.get_number_of_files == 3
input = cla.get_file_name( 0 )
else
i = input
end
- output = i + "_ni.fasta"
- list_file = i + ".nim"
+ output = i + Constants::ID_NORMALIZED_FASTA_FILE_SUFFIX
+ list_file = i + Constants::ID_MAP_FILE_SUFFIX
+ else
+ print_help()
+ exit(-1)
end
-
allowed_opts = Array.new
allowed_opts.push( EXTRACT_TAXONOMY_OPTION )
+ allowed_opts.push( ANNOTATION_OPTION )
disallowed = cla.validate_allowed_options_as_str( allowed_opts )
if ( disallowed.length > 0 )
extract_taxonomy = true
end
- if ( File.exists?( output ) )
+ annotation = nil
+ if ( cla.is_option_set?( ANNOTATION_OPTION ) )
+ annotation = cla.get_option_value( ANNOTATION_OPTION )
+ end
+
+ if ( File.exist?( output ) )
Util.fatal_error( PRG_NAME, "outfile [" + output + "] already exists" )
end
- if ( File.exists?( list_file ) )
+ if ( File.exist?( list_file ) )
Util.fatal_error( PRG_NAME, "list file [" + list_file + "] already exists" )
end
- if ( !File.exists?( input) )
+ if ( !File.exist?( input) )
Util.fatal_error( PRG_NAME, "infile [" + input + "] does not exist" )
end
if ( extract_taxonomy )
puts( "Extract taxonomy: true" )
end
+ if ( annotation != nil )
+ puts( "Annotation : " + annotation )
+ end
puts()
f = MsaFactory.new()
lf = File.open( list_file, "a" )
for i in 0 ... msa.get_number_of_seqs
seq = msa.get_sequence( i )
- seq.set_name( modify_name( seq.get_name(), i, lf, extract_taxonomy ) )
+ seq.set_name( modify_name( seq.get_name(), i, lf, extract_taxonomy, annotation ) )
end
io = MsaIO.new()
w = nil
else
w = PhylipSequentialWriter.new()
end
- w.set_max_name_length( 10 )
+ w.set_max_name_length( 9 )
w.clean( true )
begin
io.write_to_file( msa, output, w )
lf.close()
Util.print_message( PRG_NAME, "wrote: " + list_file )
Util.print_message( PRG_NAME, "wrote: " + output )
+ Util.print_message( PRG_NAME, "next steps in standard analysis pipeline: hmmscan followed by hsp.rb")
Util.print_message( PRG_NAME, "OK" )
end
private
- def modify_name( desc, counter, file, extract_taxonomy )
+ def modify_name( desc, counter, file, extract_taxonomy, annotation )
new_desc = nil
desc.gsub!( /\s+/, ' ' )
- #if desc =~ /^>?\s*\S{1,10}_(([A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA|CAP)/
- # new_desc = counter.to_s( 16 ) + "_" + $1
if extract_taxonomy
if desc =~/\s\[(([A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA|CAP)\]/
new_desc = counter.to_s( 16 ) + "_" + $1
else
new_desc = counter.to_s( 16 )
end
- file.print( new_desc + "\t" + desc + "\n" )
+ if (annotation != nil)
+ new_desc = new_desc + annotation
+ file.print( new_desc + "\t" + desc + " " + annotation + "\n" )
+ else
+ file.print( new_desc + "\t" + desc + "\n" )
+ end
+ if ( new_desc.length > 9)
+ Util.fatal_error( PRG_NAME, "shortened identifier [" +
+ new_desc + "] is too long (" + new_desc.length.to_s + " characters)" )
+ end
new_desc
end
+ def print_help()
+ puts( "Usage:" )
+ puts()
+ puts( " " + PRG_NAME + ".rb [options] <input sequences> [output sequences] [output id list]" )
+ puts()
+ puts( " options: -" + EXTRACT_TAXONOMY_OPTION + " : to extract taxonomy information from bracketed expressions" )
+ puts( " -" + ANNOTATION_OPTION + "=<s>: to add an annotation to all entries" )
+ puts()
+ puts( "Example:" )
+ puts()
+ puts( " " + PRG_NAME + ".rb P53.fasta" )
+ puts()
+ end
+
end # class TaxonomyProcessor
end # module Evoruby
#
# = lib/evo/util/command_line_arguments.rb - CommandLineArguments class
#
-# Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
#
-# $Id: command_line_arguments.rb,v 1.2 2007/06/12 04:51:34 cmzmasek Exp $
-#
-# last modified: 05/16/2007
+# Last modified: 2017/02/12
module Evoruby
-
- class CommandLineArguments
-
- OPTIONS_PREFIX = "-"
- EXTENDED_OPTIONS_PREFIX = "--"
- OPTIONS_SEPARATOR = "="
-
- # raises ArgumentError
- def initialize( args )
- @options = Hash.new
- @extended_options = Hash.new
- @file_names = Array.new
- parse_arguments( args )
- end
-
- def get_file_names
- return @file_names
- end
-
- def get_file_name( i )
- return @file_names[ i ]
- end
-
- def get_number_of_files()
- return @file_names.length
- end
-
- def is_option_set?( option_name )
- o = get_all_options
- return ( o.has_key?( option_name ) )
- end
-
- # raises ArgumentError
- def get_option_value( option_name )
- o = get_all_options
- if ( o.has_key?( option_name ) )
- value = o[ option_name ]
- if ( !Util.is_string_empty?( value ) )
- return value
- else
- raise( ArgumentError, "value for option \"" +
- option_name + "\" is not set", caller )
- end
- else
- raise( ArgumentError, "option \"" + option_name +
- "\" is not set", caller )
- end
- end
-
- def get_option_value_as_int( option_name )
- return get_option_value( option_name ).to_i
- end
-
- def get_option_value_as_float( option_name )
- return get_option_value( option_name ).to_f
- end
-
- # mandatory_options (Array)
- #
- def validate_mandatory_options( mandatory_options )
- o = get_all_options
- missing = Array.new
- for ma in mandatory_options
- if ( !o.has_key?( ma ) )
- missing.push( ma )
- end
- end
- return missing
- end
-
- # mandatory_options (Array)
- #
- def validate_mandatory_options_as_str( mandatory_options )
- missing = validate_mandatory_options( mandatory_options )
- return missing.join( ", " )
- end
-
- # allowed_options (Array)
- #
- def validate_allowed_options( allowed_options )
- o = get_all_options
- disallowed = Array.new
- o.each_key { |op|
- if ( !allowed_options.include?( op ) )
- disallowed.push( op )
- end
- }
- return disallowed
- end
-
- # allowed_options (Array)
- #
- def validate_allowed_options_as_str( allowed_options )
- disallowed = validate_allowed_options( allowed_options )
- return disallowed.join( ", " )
- end
-
- private
-
- def get_all_options
- o = Hash.new
- o.merge!( get_options_list )
- o.merge!( get_extended_options_list )
- return o
- end
-
- def parse_arguments( args )
- for arg in args
- if ( arg.index( EXTENDED_OPTIONS_PREFIX ) == 0 )
- parse_option( arg.slice( EXTENDED_OPTIONS_PREFIX.length, arg.length() - 1 ),
- get_extended_options_list )
-
- elsif ( arg.index( OPTIONS_PREFIX ) == 0 )
- parse_option( arg.slice( OPTIONS_PREFIX.length, arg.length() - 1 ),
- get_options_list )
-
- else
- get_file_names.push( arg )
- end
- end
- end
-
- # raises ArgumentError
- def parse_option( option, options_map )
- sep_index = option.index( OPTIONS_SEPARATOR )
- if ( sep_index == nil )
- if ( Util.is_string_empty?( option ) )
- raise( ArgumentError, "attempt to set option with an empty name" )
- end
- if ( get_all_options.has_key?( option ) )
- raise( ArgumentError, "attempt to set option \"" +
- option + "\" mutiple times" )
- end
- options_map[ option ] = ""
- else
- key = option.slice( 0, sep_index )
- value = option.slice( sep_index + 1, option.length() - 1 )
- if ( Util.is_string_empty?( key ) )
- raise( ArgumentError, "attempt to set option with an empty name" )
- end
- if ( Util.is_string_empty?( value ) )
- raise( ArgumentError, "attempt to set option with an empty value" )
- end
- if ( get_all_options.has_key?( key ) )
- raise( ArgumentError, "attempt to set option \"" +
- key + "\" mutiple times [" + option + "]" )
- end
- options_map[ key ] = value
- end
- end
-
- def get_file_names_list
- return @file_names
- end
-
- def get_options_list
- return @options
- end
-
- def get_extended_options_list
- return @extended_options
- end
-
- end # class CommandLineArguments
+ class CommandLineArguments
+
+ OPTIONS_PREFIX = "-"
+ EXTENDED_OPTIONS_PREFIX = "--"
+ OPTIONS_SEPARATOR = "="
+ # raises ArgumentError
+ def initialize( args )
+ @options = Hash.new
+ @extended_options = Hash.new
+ @file_names = Array.new
+ parse_arguments( args )
+ end
+
+ def get_file_names
+ return @file_names
+ end
+
+ def get_file_name( i )
+ return @file_names[ i ]
+ end
+
+ def get_number_of_files()
+ return @file_names.length
+ end
+
+ def is_option_set?( option_name )
+ o = get_all_options
+ return ( o.has_key?( option_name ) )
+ end
+
+ # raises ArgumentError
+ def get_option_value( option_name )
+ o = get_all_options
+ if ( o.has_key?( option_name ) )
+ value = o[ option_name ]
+ if ( !Util.is_string_empty?( value ) )
+ return value
+ else
+ puts()
+ puts( "value for option \"" + option_name + "\" is not set")
+ puts()
+ exit( -1 )
+ end
+ else
+ raise( ArgumentError, "option \"" + option_name +
+ "\" is not set", caller )
+ end
+ end
+
+ def get_option_value_as_int( option_name )
+ return get_option_value( option_name ).to_i
+ end
+
+ def get_option_value_as_float( option_name )
+ return get_option_value( option_name ).to_f
+ end
+
+ # mandatory_options (Array)
+ #
+ def validate_mandatory_options( mandatory_options )
+ o = get_all_options
+ missing = Array.new
+ for ma in mandatory_options
+ if ( !o.has_key?( ma ) )
+ missing.push( ma )
+ end
+ end
+ return missing
+ end
+
+ # mandatory_options (Array)
+ #
+ def validate_mandatory_options_as_str( mandatory_options )
+ missing = validate_mandatory_options( mandatory_options )
+ return missing.join( ", " )
+ end
+
+ # allowed_options (Array)
+ #
+ def validate_allowed_options( allowed_options )
+ o = get_all_options
+ disallowed = Array.new
+ o.each_key { |op|
+ if ( !allowed_options.include?( op ) )
+ disallowed.push( op )
+ end
+ }
+ return disallowed
+ end
+
+ # allowed_options (Array)
+ #
+ def validate_allowed_options_as_str( allowed_options )
+ disallowed = validate_allowed_options( allowed_options )
+ return disallowed.join( ", " )
+ end
+
+ private
+
+ def get_all_options
+ o = Hash.new
+ o.merge!( get_options_list )
+ o.merge!( get_extended_options_list )
+ return o
+ end
+
+ def parse_arguments( args )
+ for arg in args
+ if ( arg.index( EXTENDED_OPTIONS_PREFIX ) == 0 )
+ parse_option( arg.slice( EXTENDED_OPTIONS_PREFIX.length, arg.length() - 1 ),
+ get_extended_options_list )
+
+ elsif ( arg.index( OPTIONS_PREFIX ) == 0 )
+ parse_option( arg.slice( OPTIONS_PREFIX.length, arg.length() - 1 ),
+ get_options_list )
+
+ else
+ get_file_names.push( arg )
+ end
+ end
+ end
+
+ # raises ArgumentError
+ def parse_option( option, options_map )
+ sep_index = option.index( OPTIONS_SEPARATOR )
+ if ( sep_index == nil )
+ if ( Util.is_string_empty?( option ) )
+ raise( ArgumentError, "attempt to set option with an empty name" )
+ end
+ if ( get_all_options.has_key?( option ) )
+ raise( ArgumentError, "attempt to set option \"" +
+ option + "\" mutiple times" )
+ end
+ options_map[ option ] = ""
+ else
+ key = option.slice( 0, sep_index )
+ value = option.slice( sep_index + 1, option.length() - 1 )
+ if ( Util.is_string_empty?( key ) )
+ raise( ArgumentError, "attempt to set option with an empty name" )
+ end
+ if ( Util.is_string_empty?( value ) )
+ raise( ArgumentError, "attempt to set option with an empty value" )
+ end
+ if ( get_all_options.has_key?( key ) )
+ raise( ArgumentError, "attempt to set option \"" +
+ key + "\" mutiple times [" + option + "]" )
+ end
+ options_map[ key ] = value
+ end
+ end
+
+ def get_file_names_list
+ return @file_names
+ end
+
+ def get_options_list
+ return @options
+ end
+
+ def get_extended_options_list
+ return @extended_options
+ end
+
+ end # class CommandLineArguments
end # module Evoruby
#
# = lib/evo/util/constants.rb - Constants class
#
-# Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
-#
-# $Id: constants.rb,v 1.3 2007/12/21 04:13:33 cmzmasek Exp $
-#
-# last modified: 05/11/2007
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
module Evoruby
+ class Constants
- class Constants
-
- VERBOSE = true
+ VERBOSE = true
- EVORUBY_VERSION = '1.0'
+ EVORUBY_VERSION = '1.1'
- FORESTER_HOME_ENV_VARIABLE = 'FORESTER_HOME'
- JAVA_HOME_ENV_VARIABLE = 'JAVA_HOME'
+ ID_NORMALIZED_FASTA_FILE_SUFFIX = "_ni.fasta"
+ ID_MAP_FILE_SUFFIX = ".nim"
+ DOMAIN_TABLE_SUFFIX = "_domain_table"
+ DOMAINS_TO_FORESTER_OUTFILE_SUFFIX = ".dff"
+ DOMAINS_TO_FORESTER_EVALUE_CUTOFF_SUFFIX = "_dtfE"
+
+ PFAM_V_FOR_EX = "300" # Pfam version for examples
- EVORUBY = 'evoruby'
+ FORESTER_HOME_ENV_VARIABLE = 'FORESTER_HOME'
+ JAVA_HOME_ENV_VARIABLE = 'JAVA_HOME'
- LINE_DELIMITER = "\n"
+ EVORUBY = 'evoruby'
- FILE_SEPARATOR = File::SEPARATOR
+ LINE_DELIMITER = "\n"
- DOMAIN_STRUCTURE_NHX_SEPARATOR = '>'
+ FILE_SEPARATOR = File::SEPARATOR
+ DOMAIN_STRUCTURE_NHX_SEPARATOR = '>'
- end # class Constants
+ end # class Constants
end # module Evoruby
#
# = lib/evo/util/util.rb - Util class
#
-# Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
-# License:: GNU Lesser General Public License (LGPL)
+# Copyright:: Copyright (C) 2017 Christian M. Zmasek
+# License:: GNU Lesser General Public License (LGPL)
#
-# $Id: util.rb,v 1.17 2009/10/06 22:22:46 cmzmasek Exp $
-#
-# last modified: 05/15/2007
+# Last modified: 2017/02/07
require 'lib/evo/util/constants'
module Evoruby
-
class Util
+ def Util.get_matching_files( files, prefix_pattern, suffix_pattern )
+ matching_files = Array.new
+ files.each { | file |
+ if ( !File.directory?( file ) &&
+ file !~ /^\./ &&
+ file =~ /^#{prefix_pattern}.*#{suffix_pattern}$/ )
+ matching_files << file
+ end
+ }
+ matching_files
+ end
def Util.normalize_seq_name( name, length, exception_if_too_long = false )
if name.length > length
end
name = name[ 0, length ]
elsif name.length < length
- for i in 0 ... length - name.length
+ t = length - name.length
+ t.times do
name = name + " "
end
end
value
end
-
# raises ArgumentError
def Util.file2array( path, split_by_semicolon )
Util.check_file_for_readability( path )
end
def Util.print_program_information( prg_name,
- prg_version,
- prg_desc,
- date,
- copyright,
- contact,
- www,
- io = STDOUT )
-
- # if RUBY_VERSION !~ /1.9/
- # puts( "Your ruby version is #{RUBY_VERSION}, expected 1.9.x " )
- # exit( -1 )
- # end
+ prg_version,
+ prg_desc,
+ date,
+ www,
+ io = STDOUT )
ruby_version = RUBY_VERSION
l = prg_name.length + prg_version.length + date.length + ruby_version.length + 12
io.print( prg_desc )
io.print( Constants::LINE_DELIMITER )
io.print( Constants::LINE_DELIMITER )
- io.print( "Copyright (C) " + copyright )
- io.print( Constants::LINE_DELIMITER )
- io.print( "Contact: " + contact )
- io.print( Constants::LINE_DELIMITER )
- io.print( " " + www )
+ io.print( "Website: " + www )
io.print( Constants::LINE_DELIMITER )
io.print( Constants::LINE_DELIMITER )
end