1 #!/usr/local/bin/ruby -w
3 # = exe/test - Test class
5 # Copyright:: Copyright (C) 2006-2007 Christian M. Zmasek
6 # License:: GNU Lesser General Public License (LGPL)
8 # $Id: test.rb,v 1.18 2010/10/08 22:04:17 cmzmasek Exp $
10 # last modified: 05/15/2007
13 require 'lib/evo/util/constants'
14 require 'lib/evo/taxonomy/taxonomy'
15 require 'lib/evo/sequence/sequence'
16 require 'lib/evo/msa/msa'
17 require 'lib/evo/msa/msa_factory'
18 require 'lib/evo/sequence/domain_structure'
19 require 'lib/evo/sequence/protein_domain'
20 require 'lib/evo/table/basic_table'
21 require 'lib/evo/io/msa_io'
22 require 'lib/evo/io/writer/phylip_sequential_writer'
23 require 'lib/evo/io/writer/nexus_writer'
24 require 'lib/evo/io/writer/fasta_writer'
25 require 'lib/evo/io/parser/fasta_parser'
26 require 'lib/evo/io/parser/ncbi_tseq_parser'
27 require 'lib/evo/io/parser/hmmsearch_domain_extractor'
28 require 'lib/evo/tool/domain_sequence_extractor'
29 require 'lib/evo/tool/hmmscan_parser'
30 require 'lib/evo/tool/domains_to_forester'
31 require 'lib/evo/io/parser/general_msa_parser'
32 require 'lib/evo/io/parser/basic_table_parser'
33 require 'lib/evo/util/command_line_arguments'
34 require 'lib/evo/soft/fastme'
35 require 'lib/evo/soft/tree_puzzle'
43 GENERAL_MSA_FILE = "files/test/general_msa_file.txt"
44 FASTA_FILE = "files/test/fasta_file.txt"
45 TSEQ_FILE = "files/test/ncbi_tseq.xml"
56 tax = Taxonomy.new( "pig" )
58 if tax.get_name != "pig"
62 tax1 = Taxonomy.new( "dog", "id", "source" )
65 if tax2.get_name != "dog"
68 if tax2.get_id != "id"
71 if tax2.get_id_source != "source"
83 tax3 = Taxonomy.new( "dog", "id" )
88 tax4 = Taxonomy.new( "dog" )
89 tax5 = Taxonomy.new( "dog" )
106 seq = Sequence.new( "seq1", "WLIQ" )
107 if ( seq.get_length != 4 )
110 if ( seq.get_residue( 3 ) != "Q" )
113 seq.append!( "E?-*X_Y" )
114 if ( seq.get_length != 11 )
117 if ( seq.get_residue( 3 ) != "Q" )
120 if ( seq.get_residue( 4 ) != "E" )
123 seq.append!( "A V_" )
124 if ( seq.get_length != 15 )
127 if ( !Test::same?( seq.get_gap_length, 5 ) )
130 if ( !Test::same?( seq.get_gap_ratio, 5.0 / 15.0 ) )
133 seq.delete_residue!( 0 )
134 seq.delete_residue!( 2 )
136 seq.delete_residue!( 0 )
137 seq.delete_residue!( 0 )
139 if ( seq2.get_length != 13 )
142 if ( seq2.get_sequence_as_string != "LIE?-*X_YA V_" )
145 if ( seq2.get_slice( 2, 2 ) != "E?" )
148 if ( seq2.get_slice( 0, 1 ) != "L" )
151 if ( seq2.get_subsequence( 1, 4 ).get_sequence_as_string != "IE?-" )
154 if ( seq2.get_name() != "seq1" )
157 if ( seq2.get_slice!( 2, 2 ) != "E?" )
160 if ( seq2.get_sequence_as_string != "LI-*X_YA V_" )
163 if ( seq2.get_length != 11 )
166 if ( seq2.get_character_code( 0 ) != 76 )
169 str_0 = " Li-*X_YA V_ 3 3 1212 ?? B1J OU.Z "
170 if ( Util.clean_seq_str( str_0 ) != "LI-X-YAV-XXXXXX-X" )
174 tax = Taxonomy.new( "dog", "tax_id", "tax_source" )
175 seqn = Sequence.new( "seqn", "VVVVV", "acc", "acc source", tax, "symbol", "2accession", "2source" )
177 if ( seqc.get_name() != "seqn" )
180 if ( seqc.get_accession() != "acc" )
183 if ( seqc.get_accession_source() != "acc source" )
186 if ( seqc.get_taxonomy.get_name != "dog" )
189 if ( seqc.get_taxonomy.get_id != "tax_id" )
192 if ( seqc.get_symbol != "symbol" )
195 if ( seqc.get_secondary_accession != "2accession" )
198 if ( seqc.get_secondary_accession_source != "2source" )
202 rescue Exception => e
215 seq0 = Sequence.new( "seq 0", "a-*-_ x-ijklmnopqrstuvwxyz" )
216 seq1 = Sequence.new( "seq 1", "ab--_ X-ijklmnopqrstuvwxyz" )
217 seq2 = Sequence.new( "seq 2", "abc-_?x-ijklmnopqrstuvwxyz" )
218 seq3 = Sequence.new( "seq 3", "abcd_?x-ijklmnopqrstuvwxyz" )
219 seq4 = Sequence.new( "seq 4", "abcde?x-ijklmnopqrstuvwxyz" )
220 seq5 = Sequence.new( "seq 5", "abcdefx-ijklmnopqrstuvwxyz" )
221 msa.add_sequence( seq0 );
222 msa.add_sequence( seq1 );
223 msa.add_sequence( seq2 );
224 msa.add_sequence( seq3 );
225 msa.add_sequence( seq4 );
226 msa.add_sequence( seq5 );
227 msa.add( "seq 6", "abcdefg-ijklmnopqrstuvwxyz" );
228 if ( msa.get_sequence( 0 ).get_name() != "seq 0" )
231 if ( msa.get_by_name( "Eq 1", false, true ).get_name != "seq 1" )
234 if ( msa.find_by_name( "Eq 2", false, true )[ 0 ] != 2 )
237 if ( !msa.is_aligned )
240 if ( msa.get_number_of_seqs != 7 )
243 if ( msa.get_length != 26 )
246 msa.add( "seq 7", "abcdefgqijklmnopqrstuvwxyz" );
247 if ( msa.get_number_of_seqs != 8 )
250 msa.remove_sequence!( 7 )
251 if ( msa.get_number_of_seqs != 7 )
254 msa.remove_gap_only_columns!()
255 if ( msa.get_length() != 25 )
258 if ( msa.get_by_name( "seq 0" ).get_sequence_as_string != "a-*-_ xijklmnopqrstuvwxyz" )
261 msa.remove_gap_columns_w_gap_ratio!( 6.1 / 7.0 )
262 if ( msa.get_length() != 25 )
265 msa.remove_gap_columns_w_gap_ratio!( 6.0 / 7.0 )
266 if ( msa.get_length() != 25 )
269 if ( msa.get_by_name( "seq 0" ).get_sequence_as_string != "a-*-_ xijklmnopqrstuvwxyz" )
272 msa.remove_gap_columns_w_gap_ratio!( 5.0 / 7.0 )
273 if ( msa.get_length() != 25 )
276 if ( msa.get_by_name( "seq 0" ).get_sequence_as_string != "a-*-_ xijklmnopqrstuvwxyz" )
279 msa.remove_gap_columns_w_gap_ratio!( 2.0 / 7.0 )
280 if ( msa.get_length() != 23 )
283 if ( msa.get_by_name( "seq 0" ).get_sequence_as_string != "a-* xijklmnopqrstuvwxyz" )
284 puts msa.get_by_name( "seq 0" ).get_sequence_as_string
287 msa.remove_gap_columns_w_gap_ratio!( 1.0 / 7.0 )
288 if ( msa.get_length() != 21 )
291 if ( msa.get_by_name( "seq 0" ).get_sequence_as_string != "a-xijklmnopqrstuvwxyz" )
294 msa2 = Evoruby::Msa.new()
295 msa2.add( "seq0", "abcdefgh" );
296 msa2.add( "seq1", "a-cdefgh" );
297 msa2.add( "seq2", "a--defgh" );
298 msa2.add( "seq3", "a---efgh" );
299 msa2.add( "seq4", "a----fgh" );
300 msa2.add( "seq5", "a" );
301 if ( msa2.is_aligned )
304 msa2.remove_sequence!( 5 )
305 if ( !msa2.is_aligned )
308 if ( msa2.get_number_of_seqs != 5 )
311 msa2.remove_gap_only_columns!()
313 if ( msa2.get_length != 8 )
317 msa2.remove_sequences_by_gap_ratio!( 4.0 / 8.0 )
318 if ( msa2.get_number_of_seqs != 5 )
321 msa2.remove_sequences_by_gap_ratio!( 3.0 / 8.0 )
322 if ( msa2.get_number_of_seqs != 4 )
325 msa2.remove_sequences_by_gap_ratio!( 1.0 / 8.0 )
326 if ( msa2.get_number_of_seqs != 2 )
329 msa2.remove_sequences_by_gap_ratio!( 0.0 )
330 if ( msa2.get_number_of_seqs != 1 )
333 msa2.add( "seq1", "a-cdefgh" );
334 msa2.add( "seq2", "a--defgh" );
335 msa2.add( "seq3", "a---efgh" );
336 msa2.add( "seq4", "a----fgh" );
338 msa2.remove_sequences_by_non_gap_length!( 4 )
339 if ( msa2.get_number_of_seqs != 5 )
342 msa2.remove_sequences_by_non_gap_length!( 5 )
343 if ( msa2.get_number_of_seqs != 4 )
346 msa2.remove_sequences_by_non_gap_length!( 8 )
347 if ( msa2.get_number_of_seqs != 1 )
350 msa2.add( "seq1", "a-cdefgh" );
351 msa2.add( "seq2", "a--defgh" );
352 msa2.add( "seq3", "a---efgh" );
353 msa2.add( "seq4", "a----fgh" );
355 if ( msa2.get_by_name( "seq0" ).get_sequence_as_string != "abcdefgh" )
359 if ( msa2.get_by_name( "seq0" ).get_sequence_as_string != "de" )
362 msa3 = Evoruby::Msa.new()
363 msa3.add( "seq0", "abcdefgh-abcdef--*" );
364 msa3.add( "seq1", "b-deefgh-a____f--*" );
365 msa3.add( "seq2", "A________abcdef--*" );
366 msa3.add( "seq3", "A Efgh---------*" );
367 msa3.add( "seq4", " eFhh---------*" );
368 msa3.add( "seq5", "----------------ee" );
369 if ( !Test::same?( msa3.calculate_overlap( 0, 0 ), 14 ) )
372 if ( !Test::same?( msa3.calculate_overlap( 0, 1 ), 9 ) )
375 if ( !Test::same?( msa3.calculate_overlap( 0, 5 ), 0 ) )
378 if ( !Test::same?( msa3.calculate_overlap( 4, 5 ), 0 ) )
381 if ( !msa3.overlap?( 2, 3 ) )
384 if ( msa3.overlap?( 2, 3, 2 ) )
387 if ( msa3.overlap?( 4, 5 ) )
390 if ( !Test::same?( msa3.calculate_identities( 4, 5 ), 0 ) )
393 if ( !Test::same?( msa3.calculate_identities( 3, 4 ), 3 ) )
396 if ( msa3.split_into_overlapping_msa.length != 3 )
399 if ( msa3.split_into_overlapping_msa( 5 ).length != 4 )
405 seq0 = Sequence.new( "seq 0", "ABCDED" )
406 seq1 = Sequence.new( "seq 1", "ABCDEE" )
407 seq2 = Sequence.new( "seq 2", "abcded" )
408 seq3 = Sequence.new( "seq 3", " ABCDEE" )
409 seq4 = Sequence.new( "seq 4", "ABCDEV" )
410 seq5 = Sequence.new( "seq 5", "ABCDED" )
411 seq6 = Sequence.new( "seq 6", "AB.DEI" )
412 seq7 = Sequence.new( "seq 7", "aB-DEi*" )
413 seq8 = Sequence.new( "seq 8", "ABCDED" )
414 seq9 = Sequence.new( "seq 9", "ABCDED" )
415 seq10 = Sequence.new( "seq 10", "ABCDED" )
416 seq11 = Sequence.new( "seq 11", "ABCDED" )
417 msa4.add_sequence( seq0 );
418 msa4.add_sequence( seq1 );
419 msa4.add_sequence( seq2 );
420 msa4.add_sequence( seq3 );
421 msa4.add_sequence( seq4 );
422 msa4.add_sequence( seq5 );
423 msa4.add_sequence( seq6 );
424 msa4.add_sequence( seq7 );
425 msa4.add_sequence( seq8 );
426 msa4.add_sequence( seq9 );
427 msa4.add_sequence( seq10 );
428 msa4.add_sequence( seq11 );
430 msa4.remove_redundant_sequences!
434 if msa4.get_number_of_seqs != 4
438 if msa4.get_sequence( 0 ).get_name != "seq 0"
441 if msa4.get_sequence( 1 ).get_name != "seq 1"
444 if msa4.get_sequence( 2 ).get_name != "seq 4"
447 if msa4.get_sequence( 3 ).get_name != "seq 6"
451 rescue Exception => e
460 def test_msa_factory()
463 rescue Exception => e
472 def test_domain_structure()
474 ds = DomainStructure.new( 190 )
475 rescue Exception => e
484 def test_protein_domain()
486 ds = ProteinDomain.new( "domain", 23, 466, "d1", 0.4 )
487 rescue Exception => e
496 def test_basic_table()
499 t.set_value( 233, 923, "snake" )
500 t.set_value( 233, 923, "lizard" )
501 if ( t.get_value_as_string( 233, 923 ) != "lizard" )
504 if ( t.get_value_as_string( 33, 23 ) != "" )
507 rescue Exception => e
519 rescue Exception => e
528 def test_phylip_sequentialwriter()
530 p = PhylipSequentialWriter.new()
531 rescue Exception => e
540 def test_nexus_writer()
542 n = NexusWriter.new()
543 rescue Exception => e
552 def test_fasta_writer()
554 f = FastaWriter.new()
555 rescue Exception => e
564 def test_general_msa_parser( path_to_evoruby )
566 g = GeneralMsaParser.new()
569 if ( !Util::is_string_empty?( path_to_evoruby ) )
570 sep = Constants::FILE_SEPARATOR
572 msa = f.create_msa_from_file( path_to_evoruby +
574 GENERAL_MSA_FILE, g )
576 if ( msa.get_length() != 29 )
579 if ( msa.get_number_of_seqs() != 7 )
583 seq0 = msa.get_sequence( 0 )
584 seq1 = msa.get_sequence( 1 )
585 seq2 = msa.get_sequence( 2 )
586 seq3 = msa.get_sequence( 3 )
587 seq4 = msa.get_sequence( 4 )
588 seq5 = msa.get_sequence( 5 )
589 seq6 = msa.get_sequence( 6 )
591 if ( seq0.get_name() != "sequence0" )
594 if ( seq0.get_sequence_as_string() != "ABCDE.GHIJKLMNOPQR.TUVWabcxy0" )
598 if ( seq1.get_name() != "sequence1" )
601 if ( seq1.get_sequence_as_string() != "abcdefghijklmnopqrstuvwabcxy1" )
605 if ( seq2.get_name() != "sequence2" )
608 if ( seq2.get_sequence_as_string() != "abcdefghijkl---x_-*?_XXabcxy2" )
612 if ( seq3.get_name() != "sequence3" )
615 if ( seq3.get_sequence_as_string() != "12345678901234567890123abcxy3" )
619 if ( seq4.get_name() != "sequence4" )
622 if ( seq4.get_sequence_as_string() != "--------------------------xy4" )
626 if ( seq5.get_name() != "sequence5" )
629 if ( seq5.get_sequence_as_string() != "a*c*ef****************wabcxy5" )
633 if ( seq6.get_name() != "sequence6" )
636 if ( seq6.get_sequence_as_string() != "ururufhfghfgftgfhftgfttabcxy6" )
640 rescue Exception => e
649 def test_fasta_parser( path_to_evoruby )
651 fasta = FastaParser.new()
654 if ( !Util::is_string_empty?( path_to_evoruby ) )
655 sep = Constants::FILE_SEPARATOR
657 msa = f.create_msa_from_file( path_to_evoruby +
661 if ( msa.get_length() != 6 )
664 if ( msa.get_number_of_seqs() != 4 )
668 seq0 = msa.get_sequence( 0 )
669 seq1 = msa.get_sequence( 1 )
670 seq2 = msa.get_sequence( 2 )
671 seq3 = msa.get_sequence( 3 )
673 if ( seq0.get_name() != "sequence 0" )
676 if ( seq0.get_sequence_as_string() != "ABCDEF" )
680 if ( seq1.get_name() != "sequence 1" )
683 if ( seq1.get_sequence_as_string() != "abcdef" )
687 if ( seq2.get_name() != "sequence 2" )
690 if ( seq2.get_sequence_as_string() != "123456" )
693 if ( seq3.get_name() != "sequence 3" )
696 if ( seq3.get_sequence_as_string() != "a-c--f" )
700 rescue Exception => e
709 def test_ncbi_tseq_parser( path_to_evoruby )
711 parser = NcbiTSeqParser.new
714 if ( !Util::is_string_empty?( path_to_evoruby ) )
715 sep = Constants::FILE_SEPARATOR
717 msa = f.create_msa_from_file( path_to_evoruby +
721 if ( msa.get_number_of_seqs() != 9 )
725 seq0 = msa.get_sequence( 0 )
726 seq1 = msa.get_sequence( 1 )
727 seq8 = msa.get_sequence( 8 )
729 if ( seq0.get_name() != "SusD [Bacteroides thetaiotaomicron VPI-5482]" )
732 if ( seq0.get_sequence_as_string() != "MKTKYIKQLFSAALIAVLSSGVTSCINDLDISPIDPQTGGSFDQQGVFVKGYAMLGVTGQKGIDGSPDLDGQDEGESGFYRTTFNCNELPTDECLWAWQENQDIPQLTSISWSPSSQRTEWVYVRLGYDITQYNFFLDQTEGMTDAETLRQRAEIRFLRALHYWYFLDLFGKAPFKEHFSNDLPVEKKGTELYTYIQNELNEIEADMYEPRQAPFGRADKAANWLLRARLYLNAGVYTGQTDYAKAEEYASKVIGSAYKLCTNYSELFMADNDENENAMQEIILPIRQDGVKTRNYGGSTYLVCGTRVAGMPRMGTTNGWSCIFARAAMVQKFFSNLEDVPMLPADVEIPTKGLDTDEQIDAFDAEHGIRTEDMIKAAGDDRALLYSGVGGGRRKIQTDAISGFTDGLSIVKWQNYRSDGKPVSHATYPDTDIPLFRLAEAYLTRAEAIFRQGGDATGDINELRKRANCTRKVQTVTEQELIDEWAREFYLEGRRRSDLVRFGMFTTNKYLWDWKGGAMNGTSVASYYNKYPIPVSDINNNRNMSQNEGYK" )
735 if ( seq0.get_accession != "29341016" )
738 if ( seq0.get_accession_source != "gi" )
741 if ( seq0.get_taxonomy.get_name != "Bacteroides thetaiotaomicron VPI-5482" )
744 if ( seq0.get_taxonomy.get_id != "226186" )
747 if ( seq0.get_taxonomy.get_id_source != "ncbi" )
752 if ( seq1.get_name() != "SusD, outer membrane protein [Bacteroides thetaiotaomicron VPI-5482]" )
755 if ( seq1.get_accession != "29349109" )
758 if ( seq1.get_accession_source != "gi" )
761 if ( seq1.get_taxonomy.get_name != "Bacteroides thetaiotaomicron VPI-5482" )
764 if ( seq1.get_taxonomy.get_id != "226186" )
767 if ( seq1.get_taxonomy.get_id_source != "ncbi" )
772 if ( seq8.get_name() != "Chain A, B. Thetaiotaomicron Susd With Maltotriose" )
775 if ( seq8.get_accession != "pdb|3CKB|A" )
778 if ( seq8.get_accession_source != "ncbi" )
781 if ( seq8.get_taxonomy.get_name != "Bacteroides thetaiotaomicron" )
784 if ( seq8.get_taxonomy.get_id != "818" )
787 if ( seq8.get_taxonomy.get_id_source != "ncbi" )
791 rescue Exception => e
800 def test_hmmsearch_domain_extractor()
802 h = Evoruby::HmmsearchDomainExtractor.new()
803 rescue Exception => e
812 def test_domain_sequence_extractor()
814 h = Evoruby::DomainSequenceExtractor.new()
815 rescue Exception => e
824 def test_hmmscan_parser()
826 h = Evoruby::HmmscanParser.new()
827 rescue Exception => e
836 def test_domains_to_forester()
838 d = Evoruby::DomainsToForester.new()
839 rescue Exception => e
849 def test_basic_table_parser()
851 b = Evoruby::BasicTableParser.new()
852 rescue Exception => e
864 cla = CommandLineArguments.new( Array.new )
865 rescue Exception => e
874 def test_tree_puzzle()
876 tp = TreePuzzle.new()
877 tp.run( '/home/czmasek/scratch/small.aln',
881 rescue Exception => e
892 fastme = FastMe.new()
893 fastme.run( '/home/czmasek/scratch/outdist', 0, :GME )
894 rescue Exception => e
909 puts "ruby version " + RUBY_VERSION
910 puts Constants::EVORUBY + " version " + Constants::EVORUBY_VERSION
913 path_to_evoruby = Test.get_path_to_evoruby()
915 if ( Util.is_string_empty?( path_to_evoruby ) )
918 puts( "Warning! Path to evoruby could not be established. Some tests will might fail." )
922 print( "--- Taxonomy: " )
923 if ( test_taxonomy() )
931 print( "--- Sequence: " )
932 if ( test_sequence() )
949 print( "--- MsaFactory: " )
950 if ( test_msa_factory() )
958 print( "--- DomainStructure: " )
959 if ( test_domain_structure() )
967 print( "--- ProteinDomain: " )
968 if ( test_protein_domain() )
976 print( "--- BasicTable: " )
977 if ( test_basic_table() )
985 print( "--- MsaIO: " )
994 print( "--- PhylipSequentialWriter: " )
995 if ( test_phylip_sequentialwriter )
1003 print( "--- FastaWriter : " )
1004 if ( test_fasta_writer )
1012 print( "--- NexusWriter: " )
1013 if ( test_nexus_writer )
1021 print( "--- FastaParser: " )
1022 if ( test_fasta_parser( path_to_evoruby ) )
1030 print( "--- NCBI Tseq parser: " )
1031 if ( test_ncbi_tseq_parser( path_to_evoruby ) )
1039 print( "--- GeneralMsaParser: " )
1040 if ( test_general_msa_parser( path_to_evoruby ) )
1049 print( "--- Hmmsearch domain extractor: " )
1050 if ( test_hmmsearch_domain_extractor )
1058 print( "--- Domain sequence extractor: " )
1059 if ( test_domain_sequence_extractor )
1067 print( "--- Hmmscan parser: " )
1068 if ( test_hmmscan_parser )
1077 print( "--- Domains 2 forester: " )
1078 if ( test_domains_to_forester )
1086 print( "--- BasicTableParser: " )
1087 if ( test_basic_table_parser )
1095 print( "--- TreePuzzle (wrapper): " )
1096 if ( test_tree_puzzle() )
1104 print( "--- FastMe (wrapper): " )
1105 if ( test_fastme() )
1115 print( "--- CLA: " )
1124 puts "ruby version " + RUBY_VERSION
1125 puts Constants::EVORUBY + " version " + Constants::EVORUBY_VERSION
1128 td = Time.at( Time.now - t0 )
1129 puts( "Time : #{ td.sec }.#{ td.usec }s" )
1132 puts( "Successful tests: " + @successes.to_s )
1133 puts( "Failed tests : " + @failures.to_s )
1135 if ( @failures < 1 )
1146 def Test.same?( n, m )
1147 return ( ( n - m ).abs < 0.000001 )
1150 def Test.get_path_to_evoruby()
1151 rubylib = ENV['RUBYLIB'].split(':')
1153 rubylib.each do | path |
1154 if ( path =~ /evoruby/ )
1170 end # module Evoruby