6 # Copyright (C) 2003 Christian M. Zmasek
10 # Author: Christian M. Zmasek
11 # zmasek@genetics.wustl.edu
12 # http://www.genetics.wustl.edu/eddy/people/zmasek/
14 # Last modified 03/12/04 (Added gg)
16 # Purpose. Adds species information to a file describing a phylogenetic
17 # tree in the following format (by way of example):
18 # "((ceINX_CE33055:0.02883,cbINX_CB09748:0.02934):0.36899[&&NHX:B=100],..."
19 # ce stands for "CAEEL". The hash %SPECIES needs to be set accordingly.
45 if ( @ARGV != 1 && @ARGV != 2 ) {
46 &errorInCommandLine();
53 $outfile =~ s/\.nhx$//;
54 $outfile .= "_species.nhx";
58 $outfile = $ARGV[ 1 ];
65 die "\n$0: <<$outfile>> already exists.\n\n";
67 unless ( ( -s $infile ) && ( -f $infile ) && ( -T $infile ) ) {
68 die "\n$0: <<$infile>> does not exist, is empty, or is not a plain textfile.\n\n";
71 open( IN, "$infile" ) || die "\n$0: Cannot open file <<$infile>>: $!\n";
72 open( OUT, ">$outfile" ) || die "\n$0: Cannot create file <<$outfile>>: $!\n";
74 while ( $return_line = <IN> ) {
75 $return_line =~ s/\s+//g;
76 $return_line =~ s/\+/_/g;
78 $intree .= $return_line;
84 while ( ( my $short, my $long ) = each ( %SPECIES ) ) {
86 while ( $intree =~ /[(),]($short[^\[]+?)[(),]/ ) {
88 my $name_and_length = $1;
90 print "$name_and_length -> $name_and_length\[\&\&NHX:S=$long\]\n";
92 $intree =~ s/$name_and_length/$name_and_length\[&&NHX:S=$long\]/;
102 print "\n\nDone!\n\n";
108 sub errorInCommandLine {
110 print "extractSpecies.pl infile [outfile]";