// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.application;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Map.Entry;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.util.ParserUtils;
import org.forester.io.writers.PhylogenyWriter;
import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY;
import org.forester.phylogeny.data.Identifier;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
public final class decorator {
- private static final String SEQUENCE_NAME_FIELD = "s";
- private static final String TAXONOMY_CODE_FIELD = "c";
- private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD = "sn";
- private static final String DS_FILED = "d";
- private static final String SEQUENCE_ANNOTATION_DESC = "a";
- private static final String NODE_NAME_FIELD = "n";
- final static private String PICKY_OPTION = "p";
- final static private String FIELD_OPTION = "f";
- final static private String TRIM_AFTER_TILDE_OPTION = "t";
- final static private String MOVE_DOMAIN_NUMBER_OPTION = "mdn"; // Hidden expert option.
- final static private String TREE_NAME_OPTION = "pn";
- final static private String TREE_ID_OPTION = "pi";
- final static private String TREE_DESC_OPTION = "pd";
- final static private String EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION = "sn";
- final static private String PROCESS_NAME_INTELLIGENTLY_OPTION = "x";
- final static private String PROCESS_SIMILAR_TO_OPTION = "xs";
- final static private String CUT_NAME_AFTER_FIRST_SPACE_OPTION = "c";
- final static private String ALLOW_REMOVAL_OF_CHARS_OPTION = "r";
- final static private String ADVANCED_TABLE_OPTION = "table";
- final static private String KEY_COLUMN = "k";
- final static private String VALUE_COLUMN = "v";
- final static private String MAPPING_FILE_SEPARATOR_OPTION = "s";
- final static private String MAPPING_FILE_SEPARATOR_DEFAULT = ":";
- final static private boolean USE_FIRST_SEPARATOR_ONLY = true;
- final static private String PRG_NAME = "decorator";
- final static private String PRG_VERSION = "1.11";
- final static private String PRG_DATE = "2012.09.15";
-
- private static void argumentsError() {
- System.out.println();
- System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f=<c> <phylogenies infile> "
- + "[mapping table file] <phylogenies outfile>" );
- System.out.println();
- System.out.println( "options:" );
- System.out.println();
- System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=<c>)" );
- System.out.println( " -r=<n> : allow to remove up to n characters from the end of the names" );
- System.out.println( " in phylogenies infile if not found (in map) otherwise" );
- System.out.println( " -p : picky, fails if node name not found in mapping table" );
- System.out.println( " -" + TREE_NAME_OPTION + "=<s>: name for the phylogeny" );
- System.out.println( " -" + TREE_ID_OPTION + "=<s>: identifier for the phylogeny (in the form provider:value)" );
- System.out.println( " -" + TREE_DESC_OPTION + "=<s>: description for phylogenies" );
- System.out.println();
- System.out.println();
- System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" );
- System.out.println();
- System.out.println( " -f=<c> : field to be replaced: " + NODE_NAME_FIELD + " : node name" );
- System.out.println( " " + SEQUENCE_ANNOTATION_DESC
- + " : sequence annotation description" );
- System.out.println( " " + DS_FILED + " : domain structure" );
- System.out.println( " " + TAXONOMY_CODE_FIELD + " : taxonomy code" );
- System.out.println( " " + TAXONOMY_SCIENTIFIC_NAME_FIELD
- + ": taxonomy scientific name" );
- System.out.println( " " + SEQUENCE_NAME_FIELD + " : sequence name" );
- System.out.println( " -k=<n> : key column in mapping table (0 based)," );
- System.out.println( " names of the node to be decorated - default is 0" );
- System.out.println( " -v=<n> : value column in mapping table (0 based)," );
- System.out.println( " data which with to decorate - default is 1" );
- System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION
- + " : to extract bracketed scientific names" );
- System.out.println( " -s=<c> : column separator in mapping file, default is \""
- + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" );
- System.out.println( " -x : process name \"intelligently\" (only for -f=n)" );
- System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION
- + " : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" );
- System.out.println( " -c : cut name after first space (only for -f=n)" );
- System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION
- + " : trim node name to be replaced after tilde" );
- System.out.println();
- System.exit( -1 );
- }
+ private static final String SEQUENCE_NAME_FIELD = "s";
+ private static final String TAXONOMY_CODE_FIELD = "c";
+ private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD = "sn";
+ private static final String DS_FILED = "d";
+ private static final String SEQUENCE_ANNOTATION_DESC = "a";
+ private static final String NODE_NAME_FIELD = "n";
+ final static private String PICKY_OPTION = "p";
+ final static private String FIELD_OPTION = "f";
+ final static private String TRIM_AFTER_TILDE_OPTION = "t";
+ final static private String TREE_NAME_OPTION = "pn";
+ final static private String TREE_ID_OPTION = "pi";
+ final static private String TREE_DESC_OPTION = "pd";
+ final static private String MIDPOINT_ROOT_OPTION = "mp";
+ final static private String ORDER_TREE_OPTION = "or";
+ final static private String EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION = "sn";
+ final static private String EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION = "tc";
+ final static private String PROCESS_NAME_INTELLIGENTLY_OPTION = "x";
+ final static private String PROCESS_SIMILAR_TO_OPTION = "xs";
+ final static private String CUT_NAME_AFTER_FIRST_SPACE_OPTION = "c";
+ final static private String ALLOW_REMOVAL_OF_CHARS_OPTION = "r";
+ final static private String ADVANCED_TABLE_OPTION = "table";
+ final static private String KEY_COLUMN = "k";
+ final static private String VALUE_COLUMN = "v";
+ final static private String MAPPING_FILE_SEPARATOR_OPTION = "s";
+ final static private char MAPPING_FILE_SEPARATOR_DEFAULT = '\t';
+ final static private String PRG_NAME = "decorator";
+ final static private String PRG_VERSION = "1.14";
+ final static private String PRG_DATE = "130426";
public static void main( final String args[] ) {
ForesterUtil.printProgramInformation( decorator.PRG_NAME, decorator.PRG_VERSION, decorator.PRG_DATE );
allowed_options.add( decorator.VALUE_COLUMN );
allowed_options.add( decorator.MAPPING_FILE_SEPARATOR_OPTION );
allowed_options.add( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION );
+ allowed_options.add( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION );
allowed_options.add( decorator.TREE_NAME_OPTION );
allowed_options.add( decorator.TREE_ID_OPTION );
allowed_options.add( decorator.TREE_DESC_OPTION );
- allowed_options.add( decorator.MOVE_DOMAIN_NUMBER_OPTION );
allowed_options.add( decorator.TRIM_AFTER_TILDE_OPTION );
+ allowed_options.add( decorator.ORDER_TREE_OPTION );
+ allowed_options.add( decorator.MIDPOINT_ROOT_OPTION );
final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
if ( dissallowed_options.length() > 0 ) {
ForesterUtil.fatalError( decorator.PRG_NAME, "unknown option(s): " + dissallowed_options );
}
}
final boolean picky = cla.isOptionSet( decorator.PICKY_OPTION );
- String separator = decorator.MAPPING_FILE_SEPARATOR_DEFAULT;
+ char separator = decorator.MAPPING_FILE_SEPARATOR_DEFAULT;
if ( cla.isOptionSet( decorator.MAPPING_FILE_SEPARATOR_OPTION ) ) {
if ( advanced_table ) {
argumentsError();
}
- separator = cla.getOptionValue( decorator.MAPPING_FILE_SEPARATOR_OPTION );
+ separator = cla.getOptionValueAsChar( decorator.MAPPING_FILE_SEPARATOR_OPTION );
}
int key_column = 0;
int value_column = 1;
boolean process_name_intelligently = false;
boolean process_similar_to = false;
boolean extract_bracketed_scientific_name = false;
- boolean move_domain_numbers_at_end_to_middle = false;
+ boolean extract_bracketed_tax_code = false;
boolean trim_after_tilde = false;
+ boolean order_tree = false;
+ boolean midpoint_root = false;
String tree_name = "";
String tree_id = "";
String tree_desc = "";
}
extract_bracketed_scientific_name = true;
}
+ if ( cla.isOptionSet( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION ) ) {
+ if ( advanced_table ) {
+ argumentsError();
+ }
+ extract_bracketed_tax_code = true;
+ }
if ( cla.isOptionSet( decorator.KEY_COLUMN ) ) {
if ( advanced_table ) {
argumentsError();
numbers_of_chars_allowed_to_remove_if_not_found_in_map = cla
.getOptionValueAsInt( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION );
}
- if ( cla.isOptionSet( decorator.MOVE_DOMAIN_NUMBER_OPTION ) ) {
- move_domain_numbers_at_end_to_middle = true;
+ if ( cla.isOptionSet( decorator.MIDPOINT_ROOT_OPTION ) ) {
+ midpoint_root = true;
+ }
+ if ( cla.isOptionSet( decorator.ORDER_TREE_OPTION ) ) {
+ order_tree = true;
}
if ( cla.isOptionSet( decorator.FIELD_OPTION ) ) {
field_str = cla.getOptionValue( decorator.FIELD_OPTION );
else if ( field_str.equals( DS_FILED ) ) {
field = FIELD.DOMAIN_STRUCTURE;
extract_bracketed_scientific_name = false;
+ extract_bracketed_tax_code = false;
}
else if ( field_str.equals( TAXONOMY_CODE_FIELD ) ) {
field = FIELD.TAXONOMY_CODE;
else if ( field_str.equals( TAXONOMY_SCIENTIFIC_NAME_FIELD ) ) {
field = FIELD.TAXONOMY_SCIENTIFIC_NAME;
extract_bracketed_scientific_name = false;
+ extract_bracketed_tax_code = false;
}
else {
ForesterUtil.fatalError( decorator.PRG_NAME, "unknown value for \"" + decorator.FIELD_OPTION
ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION
+ " and -c option together" );
}
+ if ( extract_bracketed_scientific_name && extract_bracketed_tax_code ) {
+ argumentsError();
+ }
Phylogeny[] phylogenies = null;
try {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
if ( !advanced_table ) {
BasicTable<String> mapping_table = null;
try {
- mapping_table = BasicTableParser.parse( mapping_infile, separator, false, true );
+ mapping_table = BasicTableParser.parse( mapping_infile, separator, true, false );
}
catch ( final Exception e ) {
ForesterUtil.fatalError( decorator.PRG_NAME,
"failed to read [" + mapping_infile + "] [" + e.getMessage() + "]" );
}
- System.out.println( mapping_table.toString() );
if ( ( key_column < 0 ) || ( key_column >= mapping_table.getNumberOfColumns() ) ) {
ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for key column" );
}
if ( ( value_column < 0 ) || ( value_column >= mapping_table.getNumberOfColumns() ) ) {
ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for value column" );
}
+ if ( mapping_table.isEmpty() || mapping_table.getNumberOfColumns() < 1 ) {
+ ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table is empty" );
+ }
+ if ( mapping_table.getNumberOfColumns() == 1 ) {
+ ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table has only one column" );
+ }
map = mapping_table.getColumnsAsMap( key_column, value_column );
+ Iterator<Entry<String, String>> iter = map.entrySet().iterator();
+ System.out.println();
+ while ( iter.hasNext() ) {
+ Entry<String, String> e = iter.next();
+ System.out.println( e.getKey() + " => " + e.getValue() );
+ }
+ System.out.println();
}
if ( !ForesterUtil.isEmpty( tree_name ) || !ForesterUtil.isEmpty( tree_id )
|| !ForesterUtil.isEmpty( tree_desc ) ) {
phylogenies[ 0 ].setIdentifier( new Identifier( s_ary[ 1 ], s_ary[ 0 ] ) );
}
if ( !ForesterUtil.isEmpty( tree_desc ) ) {
- for( int i = 0; i < phylogenies.length; ++i ) {
- phylogenies[ i ].setDescription( tree_desc );
+ for( final Phylogeny phylogenie : phylogenies ) {
+ phylogenie.setDescription( tree_desc );
}
}
}
map,
field,
extract_bracketed_scientific_name,
+ extract_bracketed_tax_code,
picky,
cut_name_after_space,
process_name_intelligently,
process_similar_to,
numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- move_domain_numbers_at_end_to_middle,
trim_after_tilde );
}
}
catch ( final Exception e ) {
ForesterUtil.fatalError( decorator.PRG_NAME, e.getLocalizedMessage() );
}
+ if ( midpoint_root || order_tree ) {
+ for( final Phylogeny phy : phylogenies ) {
+ if ( midpoint_root ) {
+ PhylogenyMethods.midpointRoot( phy );
+ }
+ if ( order_tree ) {
+ PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.TAXONOMY );
+ }
+ }
+ }
try {
final PhylogenyWriter w = new PhylogenyWriter();
w.toPhyloXML( phylogenies, 0, phylogenies_outfile, ForesterUtil.getLineSeparator() );
ForesterUtil.programMessage( PRG_NAME, "wrote: " + phylogenies_outfile );
ForesterUtil.programMessage( PRG_NAME, "OK." );
}
+
+ private static void argumentsError() {
+ System.out.println();
+ System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f=<c> <phylogenies infile> "
+ + "[mapping table file] <phylogenies outfile>" );
+ System.out.println();
+ System.out.println( "options:" );
+ System.out.println();
+ System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=<c>)" );
+ System.out.println( " -r=<n> : allow to remove up to n characters from the end of the names" );
+ System.out.println( " in phylogenies infile if not found (in map) otherwise" );
+ System.out.println( " -p : picky, fails if node name not found in mapping table" );
+ System.out.println( " -" + TREE_NAME_OPTION + "=<s>: name for the phylogeny" );
+ System.out.println( " -" + TREE_ID_OPTION + "=<s>: identifier for the phylogeny (in the form provider:value)" );
+ System.out.println( " -" + TREE_DESC_OPTION + "=<s>: description for phylogenies" );
+ System.out.println();
+ System.out.println();
+ System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" );
+ System.out.println();
+ System.out.println( " -f=<c> : field to be replaced: " + NODE_NAME_FIELD + " : node name" );
+ System.out.println( " " + SEQUENCE_ANNOTATION_DESC
+ + " : sequence annotation description" );
+ System.out.println( " " + DS_FILED + " : domain structure" );
+ System.out.println( " " + TAXONOMY_CODE_FIELD + " : taxonomy code" );
+ System.out.println( " " + TAXONOMY_SCIENTIFIC_NAME_FIELD
+ + ": taxonomy scientific name" );
+ System.out.println( " " + SEQUENCE_NAME_FIELD + " : sequence name" );
+ System.out.println( " -k=<n> : key column in mapping table (0 based)," );
+ System.out.println( " names of the node to be decorated - default is 0" );
+ System.out.println( " -v=<n> : value column in mapping table (0 based)," );
+ System.out.println( " data which with to decorate - default is 1" );
+ System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION
+ + " : to extract bracketed scientific names, e.g. [Nematostella vectensis]" );
+ System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION
+ + " : to extract bracketed taxonomic codes, e.g. [NEMVE]" );
+ System.out.println( " -s=<c> : column separator in mapping file, default is \""
+ + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" );
+ System.out.println( " -x : process name \"intelligently\" (only for -f=n)" );
+ System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION
+ + " : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" );
+ System.out.println( " -c : cut name after first space (only for -f=n)" );
+ System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION
+ + " : trim node name to be replaced after tilde" );
+ System.out.println( " -" + decorator.MIDPOINT_ROOT_OPTION + " : to midpoint-root the tree" );
+ System.out.println( " -" + decorator.ORDER_TREE_OPTION + " : to order tree branches" );
+ System.out.println();
+ System.exit( -1 );
+ }
}