in progress
[jalview.git] / forester / ruby / evoruby / lib / evo / tool / new_tap.rb
1 #
2 # = lib/evo/apps/ -  class
3 #
4 # Copyright::  Copyright (C) 2009 Christian M. Zmasek
5 # License::    GNU Lesser General Public License (LGPL)
6 #
7 # $Id: new_tap.rb,v 1.4 2010/12/13 19:00:11 cmzmasek Exp $
8
9
10 require 'lib/evo/util/util'
11 require 'lib/evo/msa/msa_factory'
12 require 'lib/evo/msa/msa'
13 require 'lib/evo/io/msa_io'
14 require 'lib/evo/io/parser/fasta_parser'
15 require 'lib/evo/io/parser/general_msa_parser'
16 require 'lib/evo/io/writer/fasta_writer'
17 require 'lib/evo/io/writer/phylip_sequential_writer'
18 require 'lib/evo/util/command_line_arguments'
19
20 module Evoruby
21
22     class TaxonomyProcessor
23
24         PRG_NAME       = ""
25         PRG_DATE       = "2009.10.09"
26         PRG_DESC       = "replacement of labels in multiple sequence files"
27         PRG_VERSION    = "1.00"
28         COPYRIGHT      = "2009 Christian M Zmasek"
29         CONTACT        = "phylosoft@gmail.com"
30         WWW            = "www.phylosoft.org"
31
32         REMOVE_REDUNDANT_SEQS_OPTION = "rr"
33         
34         def initialize()
35             @taxonomies = Hash.new()
36         end
37
38         def run()
39
40             Util.print_program_information( PRG_NAME,
41                 PRG_VERSION,
42                 PRG_DESC,
43                 PRG_DATE,
44                 COPYRIGHT,
45                 CONTACT,
46                 WWW,
47                 STDOUT )
48
49             if ( ARGV == nil || ( ARGV.length != 3 && ARGV.length != 4 ) )
50                 puts( "Usage: #{PRG_NAME}.rb <input sequences> <output sequences> <output map>" )
51                 puts()
52                 puts( "  options: -" + REMOVE_REDUNDANT_SEQS_OPTION + ": to remove redundant sequences" )
53                 puts()
54                 exit( -1 )
55             end
56
57             begin
58                 cla = CommandLineArguments.new( ARGV )
59             rescue ArgumentError => e
60                 Util.fatal_error( PRG_NAME, "error: " + e.to_s )
61             end
62             
63             input     = cla.get_file_name( 0 )
64             output    = cla.get_file_name( 1 )
65             map_file = cla.get_file_name( 2 )
66
67             allowed_opts = Array.new
68             allowed_opts.push( REMOVE_REDUNDANT_SEQS_OPTION ) 
69             
70             disallowed = cla.validate_allowed_options_as_str( allowed_opts )
71             if ( disallowed.length > 0 )
72                 Util.fatal_error( PRG_NAME, "unknown option(s): " + disallowed )
73             end
74
75             
76             remove_redudant = false
77             if ( cla.is_option_set?( REMOVE_REDUNDANT_SEQS_OPTION ) )
78                 remove_redudant = true
79             end
80
81             if ( File.exists?( output ) )
82                 Util.fatal_error( PRG_NAME, "outfile [" + output + "] already exists" )
83             end
84             if ( File.exists?( map_file ) )
85                 Util.fatal_error( PRG_NAME, "map file [" + map_file + "] already exists" )
86             end
87             if ( !File.exists?( input) )
88                 Util.fatal_error( PRG_NAME, "infile [" + input + "] does not exist" )
89             end
90            
91             fasta_like = Util.looks_like_fasta?( input )
92
93             puts()
94             puts( "Input alignment : " + input )
95             puts( "Output alignment: " + output )
96             puts( "Output map      : " + map_file )
97             if ( fasta_like )
98                 puts( "Format          : Fasta"  )
99             else
100                 puts( "Format          : Phylip like" )
101             end
102             puts()
103
104             species_map = Hash.new
105            
106             f = MsaFactory.new()
107             begin
108                 if ( fasta_like )
109                     msa = f.create_msa_from_file( input, FastaParser.new() )
110                 else
111                     msa = f.create_msa_from_file( input, GeneralMsaParser.new() )
112                 end
113             rescue Exception => e
114                 Util.fatal_error( PRG_NAME, "failed to read file: " + e.to_s )
115             end
116
117             if ( msa == nil || msa.get_number_of_seqs() < 1 )
118                 Util.fatal_error( PRG_NAME, "failed to read MSA" )
119             end
120             begin
121                 Util.check_file_for_writability( map_file )
122             rescue Exception => e
123                 Util.fatal_error( PRG_NAME, "error: " + e.to_, STDOUT )
124             end
125
126             if ( remove_redudant ) 
127                 removed = msa.remove_redundant_sequences!( true )
128                 if removed.size > 0
129                     Util.print_message( PRG_NAME, "going to ignore the following " + removed.size.to_s + " redundant sequences:" )
130                     removed.each { | seq_name |
131                         puts seq_name
132                     }
133                     Util.print_message( PRG_NAME, "will process " + msa.get_number_of_seqs.to_s + " non redundant sequences" )
134                 end
135             end
136
137             lf = File.open( map_file, "a" )
138             for i in 0 ... msa.get_number_of_seqs
139                 seq  = msa.get_sequence( i )
140             end
141
142             io = MsaIO.new()
143             w = nil
144             if ( fasta_like )
145                 w = FastaWriter.new()
146             else
147                 w = PhylipSequentialWriter.new()
148             end
149             w.set_max_name_length( 10 )
150             w.clean( true )
151             begin
152                 io.write_to_file( msa, output, w )
153             rescue Exception => e
154                 Util.fatal_error( PRG_NAME, "failed to write file: " + e.to_s )
155             end
156             lf.close()
157             if ( @taxonomies.length > 0 )
158                 Util.print_message( PRG_NAME, "number of unique taxonomies: " + @taxonomies.length.to_s )
159             end
160             Util.print_message( PRG_NAME, "wrote: " + map_file )
161             Util.print_message( PRG_NAME, "wrote: " + output )
162             Util.print_message( PRG_NAME, "OK" )
163         end
164
165     end # class 
166
167 end # module Evoruby