public final class decorator {
- private static final String SEQUENCE_NAME_FIELD = "s";
- private static final String TAXONOMY_CODE_FIELD = "c";
- private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD = "sn";
- private static final String DS_FILED = "d";
- private static final String SEQUENCE_ANNOTATION_DESC = "a";
- private static final String NODE_NAME_FIELD = "n";
- final static private String PICKY_OPTION = "p";
- final static private String FIELD_OPTION = "f";
- final static private String TRIM_AFTER_TILDE_OPTION = "t";
- final static private String MOVE_DOMAIN_NUMBER_OPTION = "mdn"; // Hidden expert option.
- final static private String TREE_NAME_OPTION = "pn";
- final static private String TREE_ID_OPTION = "pi";
- final static private String TREE_DESC_OPTION = "pd";
- final static private String EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION = "sn";
- final static private String PROCESS_NAME_INTELLIGENTLY_OPTION = "x";
- final static private String PROCESS_SIMILAR_TO_OPTION = "xs";
- final static private String CUT_NAME_AFTER_FIRST_SPACE_OPTION = "c";
- final static private String ALLOW_REMOVAL_OF_CHARS_OPTION = "r";
- final static private String ADVANCED_TABLE_OPTION = "table";
- final static private String KEY_COLUMN = "k";
- final static private String VALUE_COLUMN = "v";
- final static private String MAPPING_FILE_SEPARATOR_OPTION = "s";
- final static private String MAPPING_FILE_SEPARATOR_DEFAULT = ": ";
- final static private String PRG_NAME = "decorator";
- final static private String PRG_VERSION = "1.11";
- final static private String PRG_DATE = "2012.09.15";
+ private static final String SEQUENCE_NAME_FIELD = "s";
+ private static final String TAXONOMY_CODE_FIELD = "c";
+ private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD = "sn";
+ private static final String DS_FILED = "d";
+ private static final String SEQUENCE_ANNOTATION_DESC = "a";
+ private static final String NODE_NAME_FIELD = "n";
+ final static private String PICKY_OPTION = "p";
+ final static private String FIELD_OPTION = "f";
+ final static private String TRIM_AFTER_TILDE_OPTION = "t";
+ final static private String TREE_NAME_OPTION = "pn";
+ final static private String TREE_ID_OPTION = "pi";
+ final static private String TREE_DESC_OPTION = "pd";
+ final static private String EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION = "sn";
+ final static private String EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION = "tc";
+ final static private String PROCESS_NAME_INTELLIGENTLY_OPTION = "x";
+ final static private String PROCESS_SIMILAR_TO_OPTION = "xs";
+ final static private String CUT_NAME_AFTER_FIRST_SPACE_OPTION = "c";
+ final static private String ALLOW_REMOVAL_OF_CHARS_OPTION = "r";
+ final static private String ADVANCED_TABLE_OPTION = "table";
+ final static private String KEY_COLUMN = "k";
+ final static private String VALUE_COLUMN = "v";
+ final static private String MAPPING_FILE_SEPARATOR_OPTION = "s";
+ final static private String MAPPING_FILE_SEPARATOR_DEFAULT = ": ";
+ final static private String PRG_NAME = "decorator";
+ final static private String PRG_VERSION = "1.11";
+ final static private String PRG_DATE = "2012.09.15";
private static void argumentsError() {
System.out.println();
System.out.println( " -v=<n> : value column in mapping table (0 based)," );
System.out.println( " data which with to decorate - default is 1" );
System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION
- + " : to extract bracketed scientific names" );
+ + " : to extract bracketed scientific names, e.g. [Nematostella vectensis]" );
+ System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION
+ + " : to extract bracketed taxonomic codes, e.g. [NEMVE]" );
System.out.println( " -s=<c> : column separator in mapping file, default is \""
+ decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" );
System.out.println( " -x : process name \"intelligently\" (only for -f=n)" );
allowed_options.add( decorator.TREE_NAME_OPTION );
allowed_options.add( decorator.TREE_ID_OPTION );
allowed_options.add( decorator.TREE_DESC_OPTION );
- allowed_options.add( decorator.MOVE_DOMAIN_NUMBER_OPTION );
allowed_options.add( decorator.TRIM_AFTER_TILDE_OPTION );
final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
if ( dissallowed_options.length() > 0 ) {
boolean process_name_intelligently = false;
boolean process_similar_to = false;
boolean extract_bracketed_scientific_name = false;
- boolean move_domain_numbers_at_end_to_middle = false;
+ boolean extract_bracketed_tax_code = false;
boolean trim_after_tilde = false;
String tree_name = "";
String tree_id = "";
}
extract_bracketed_scientific_name = true;
}
+ if ( cla.isOptionSet( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION ) ) {
+ if ( advanced_table ) {
+ argumentsError();
+ }
+ extract_bracketed_tax_code = true;
+ }
if ( cla.isOptionSet( decorator.KEY_COLUMN ) ) {
if ( advanced_table ) {
argumentsError();
numbers_of_chars_allowed_to_remove_if_not_found_in_map = cla
.getOptionValueAsInt( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION );
}
- if ( cla.isOptionSet( decorator.MOVE_DOMAIN_NUMBER_OPTION ) ) {
- move_domain_numbers_at_end_to_middle = true;
- }
if ( cla.isOptionSet( decorator.FIELD_OPTION ) ) {
field_str = cla.getOptionValue( decorator.FIELD_OPTION );
if ( field_str.equals( NODE_NAME_FIELD ) ) {
else if ( field_str.equals( DS_FILED ) ) {
field = FIELD.DOMAIN_STRUCTURE;
extract_bracketed_scientific_name = false;
+ extract_bracketed_tax_code = false;
}
else if ( field_str.equals( TAXONOMY_CODE_FIELD ) ) {
field = FIELD.TAXONOMY_CODE;
else if ( field_str.equals( TAXONOMY_SCIENTIFIC_NAME_FIELD ) ) {
field = FIELD.TAXONOMY_SCIENTIFIC_NAME;
extract_bracketed_scientific_name = false;
+ extract_bracketed_tax_code = false;
}
else {
ForesterUtil.fatalError( decorator.PRG_NAME, "unknown value for \"" + decorator.FIELD_OPTION
ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION
+ " and -c option together" );
}
+ if ( extract_bracketed_scientific_name && extract_bracketed_tax_code ) {
+ argumentsError();
+ }
Phylogeny[] phylogenies = null;
try {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
map,
field,
extract_bracketed_scientific_name,
+ extract_bracketed_tax_code,
picky,
cut_name_after_space,
process_name_intelligently,
process_similar_to,
numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- move_domain_numbers_at_end_to_middle,
trim_after_tilde );
}
}
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
-import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.forester.archaeopteryx.AptxUtil;
final Map<String, String> map,
final FIELD field,
final boolean extract_bracketed_scientific_name,
+ final boolean extract_bracketed_tax_code,
final boolean picky,
final boolean cut_name_after_space,
final boolean process_name_intelligently,
final boolean process_similar_to,
final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- final boolean move_domain_numbers_at_end_to_middle,
final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
PhyloXmlDataFormatException {
PhylogenyDecorator.decorate( phylogeny,
map,
field,
extract_bracketed_scientific_name,
+ extract_bracketed_tax_code,
picky,
null,
cut_name_after_space,
process_name_intelligently,
process_similar_to,
numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- move_domain_numbers_at_end_to_middle,
trim_after_tilde );
}
final Map<String, String> map,
final FIELD field,
final boolean extract_bracketed_scientific_name,
+ final boolean extract_bracketed_tax_code,
final boolean picky,
final Map<String, String> intermediate_map,
final boolean cut_name_after_space,
final boolean process_name_intelligently,
final boolean process_similar_to,
final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- final boolean move_domain_numbers_at_end_to_middle,
final boolean trim_after_tilde ) throws IllegalArgumentException,
PhyloXmlDataFormatException {
if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) {
new_value = extractBracketedScientificNames( node, new_value );
}
+ else if ( extract_bracketed_tax_code && new_value.endsWith( "]" ) ) {
+ new_value = extractBracketedTaxCodes( node, new_value );
+ }
switch ( field ) {
case SEQUENCE_ANNOTATION_DESC:
if ( PhylogenyDecorator.VERBOSE ) {
default:
throw new RuntimeException( "unknown field \"" + field + "\"" );
}
- if ( move_domain_numbers_at_end_to_middle && ( field != FIELD.NODE_NAME ) ) {
- node.setName( moveDomainNumbersAtEnd( node.getName() ) );
- }
}
}
else if ( picky ) {
final Map<String, String> map,
final FIELD field,
final boolean extract_bracketed_scientific_name,
+ final boolean extract_bracketed_tax_code,
final boolean picky,
final boolean cut_name_after_space,
final boolean process_name_intelligently,
final boolean process_similar_to,
final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- final boolean move_domain_numbers_at_end_to_middle,
final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
PhyloXmlDataFormatException {
for( int i = 0; i < phylogenies.length; ++i ) {
map,
field,
extract_bracketed_scientific_name,
+ extract_bracketed_tax_code,
picky,
cut_name_after_space,
process_name_intelligently,
process_similar_to,
numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- move_domain_numbers_at_end_to_middle,
trim_after_tilde );
}
}
final Map<String, String> map,
final FIELD field,
final boolean extract_bracketed_scientific_name,
+ final boolean extract_bracketed_tax_code,
final boolean picky,
final Map<String, String> intermediate_map,
final boolean cut_name_after_space,
final boolean process_name_intelligently,
final boolean process_similar_to,
final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- final boolean move_domain_numbers_at_end_to_middle,
final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
PhyloXmlDataFormatException {
for( int i = 0; i < phylogenies.length; ++i ) {
map,
field,
extract_bracketed_scientific_name,
+ extract_bracketed_tax_code,
picky,
intermediate_map,
cut_name_after_space,
process_name_intelligently,
process_similar_to,
numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- move_domain_numbers_at_end_to_middle,
trim_after_tilde );
}
}
return new_value.substring( 0, i - 1 ).trim();
}
+ private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) {
+ final int i = new_value.lastIndexOf( "[" );
+ final String tc = new_value.substring( i + 1, new_value.length() - 1 );
+ AptxUtil.ensurePresenceOfTaxonomy( node );
+ try {
+ node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
+ }
+ catch ( final PhyloXmlDataFormatException e ) {
+ throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
+ }
+ return new_value.substring( 0, i - 1 ).trim();
+ }
+
private static String extractIntermediate( final Map<String, String> intermediate_map, final String name ) {
String new_name = null;
if ( PhylogenyDecorator.VERBOSE ) {
return new_name;
}
- private static String moveDomainNumbersAtEnd( final String node_name ) {
- final Matcher m = NODENAME_SEQNUMBER_TAXDOMAINNUMBER.matcher( node_name );
- if ( m.matches() ) {
- final String seq_number = m.group( 1 );
- final String tax = m.group( 2 );
- final String domain_number = m.group( 3 );
- return seq_number + "_[" + domain_number + "]_" + tax;
- }
- else {
- return node_name;
- }
- }
-
public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
throws IOException {
final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();