class TaxonomyProcessor
PRG_NAME = "tap"
- PRG_DATE = "2013.03.20"
+ PRG_DATE = "130411"
PRG_DESC = "replacement of species names in multiple sequence files"
- PRG_VERSION = "2.00"
+ PRG_VERSION = "2.002"
COPYRIGHT = "2013 Christian M Zmasek"
CONTACT = "phylosoft@gmail.com"
WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"
Util.fatal_error( PRG_NAME, "failed to write file: " + e.to_s )
end
lf.close()
- if ( @taxonomies.length > 0 )
- Util.print_message( PRG_NAME, "number of unique taxonomies: " + @taxonomies.length.to_s )
- end
Util.print_message( PRG_NAME, "wrote: " + list_file )
Util.print_message( PRG_NAME, "wrote: " + output )
Util.print_message( PRG_NAME, "OK" )
def modify_name( desc, counter, file, extract_taxonomy )
new_desc = nil
- desc.gsub!( /:\s+/, ":" )
- desc.gsub!( /\s+/, " " )
- if desc =~ /^>?\s*\S{1,10}_([A-Z]{3,5})/
- new_desc = counter.to_s( 16 ) + "_" + $1
- elsif extract_taxonomy
- if desc =~/\s\[([A-Z0-9]{3,5})\]\b/
+ desc.gsub!( /\s+/, ' ' )
+ #if desc =~ /^>?\s*\S{1,10}_(([A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA|CAP)/
+ # new_desc = counter.to_s( 16 ) + "_" + $1
+ if extract_taxonomy
+ if desc =~/\s\[(([A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA|CAP)\]/
new_desc = counter.to_s( 16 ) + "_" + $1
else
- Util.fatal_error( PRG_NAME, "illegal format in: " + desc )
+ Util.fatal_error( PRG_NAME, "could not get taxonomy from: " + desc )
end
else
new_desc = counter.to_s( 16 )
end
- if new_desc == nil
- Util.fatal_error( PRG_NAME, "failed to extract species from: " + desc )
- end
- file.print( new_desc + ": " + desc + "\n" )
+ file.print( new_desc + "\t" + desc + "\n" )
new_desc
end