2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.application;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.List;
34 import org.forester.io.parsers.PhylogenyParser;
35 import org.forester.io.parsers.util.ParserUtils;
36 import org.forester.io.writers.PhylogenyWriter;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.PhylogenyMethods;
39 import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY;
40 import org.forester.phylogeny.data.Identifier;
41 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
42 import org.forester.phylogeny.factories.PhylogenyFactory;
43 import org.forester.tools.PhylogenyDecorator;
44 import org.forester.tools.PhylogenyDecorator.FIELD;
45 import org.forester.util.BasicTable;
46 import org.forester.util.BasicTableParser;
47 import org.forester.util.CommandLineArguments;
48 import org.forester.util.ForesterUtil;
50 public final class decorator {
52 private static final String SEQUENCE_NAME_FIELD = "s";
53 private static final String TAXONOMY_CODE_FIELD = "c";
54 private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD = "sn";
55 private static final String DS_FILED = "d";
56 private static final String SEQUENCE_ANNOTATION_DESC = "a";
57 private static final String NODE_NAME_FIELD = "n";
58 final static private String PICKY_OPTION = "p";
59 final static private String FIELD_OPTION = "f";
60 final static private String TRIM_AFTER_TILDE_OPTION = "t";
61 final static private String TREE_NAME_OPTION = "pn";
62 final static private String TREE_ID_OPTION = "pi";
63 final static private String TREE_DESC_OPTION = "pd";
64 final static private String MIDPOINT_ROOT_OPTION = "mp";
65 final static private String ORDER_TREE_OPTION = "or";
66 final static private String EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION = "sn";
67 final static private String EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION = "tc";
68 final static private String PROCESS_NAME_INTELLIGENTLY_OPTION = "x";
69 final static private String PROCESS_SIMILAR_TO_OPTION = "xs";
70 final static private String CUT_NAME_AFTER_FIRST_SPACE_OPTION = "c";
71 final static private String ALLOW_REMOVAL_OF_CHARS_OPTION = "r";
72 final static private String ADVANCED_TABLE_OPTION = "table";
73 final static private String KEY_COLUMN = "k";
74 final static private String VALUE_COLUMN = "v";
75 final static private String MAPPING_FILE_SEPARATOR_OPTION = "s";
76 final static private char MAPPING_FILE_SEPARATOR_DEFAULT = '\t';
77 final static private String PRG_NAME = "decorator";
78 final static private String PRG_VERSION = "1.14";
79 final static private String PRG_DATE = "130426";
81 public static void main( final String args[] ) {
82 ForesterUtil.printProgramInformation( decorator.PRG_NAME, decorator.PRG_VERSION, decorator.PRG_DATE );
83 if ( ( args.length < 4 ) || ( args.length > 12 ) ) {
84 decorator.argumentsError();
86 CommandLineArguments cla = null;
88 cla = new CommandLineArguments( args );
90 catch ( final Exception e ) {
91 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
93 if ( ( cla.getNumberOfNames() < 3 ) || ( cla.getNumberOfNames() > 4 ) ) {
94 decorator.argumentsError();
96 final File phylogenies_infile = cla.getFile( 0 );
97 final File mapping_infile = cla.getFile( 1 );
98 final File phylogenies_outfile = cla.getFile( 2 );
99 if ( phylogenies_outfile.exists() ) {
100 ForesterUtil.fatalError( PRG_NAME, "[" + phylogenies_outfile + "] already exists" );
102 final List<String> allowed_options = new ArrayList<String>();
103 allowed_options.add( decorator.ADVANCED_TABLE_OPTION );
104 allowed_options.add( decorator.PICKY_OPTION );
105 allowed_options.add( decorator.FIELD_OPTION );
106 allowed_options.add( decorator.PROCESS_NAME_INTELLIGENTLY_OPTION );
107 allowed_options.add( decorator.PROCESS_SIMILAR_TO_OPTION );
108 allowed_options.add( decorator.CUT_NAME_AFTER_FIRST_SPACE_OPTION );
109 allowed_options.add( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION );
110 allowed_options.add( decorator.KEY_COLUMN );
111 allowed_options.add( decorator.VALUE_COLUMN );
112 allowed_options.add( decorator.MAPPING_FILE_SEPARATOR_OPTION );
113 allowed_options.add( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION );
114 allowed_options.add( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION );
115 allowed_options.add( decorator.TREE_NAME_OPTION );
116 allowed_options.add( decorator.TREE_ID_OPTION );
117 allowed_options.add( decorator.TREE_DESC_OPTION );
118 allowed_options.add( decorator.TRIM_AFTER_TILDE_OPTION );
119 allowed_options.add( decorator.ORDER_TREE_OPTION );
120 allowed_options.add( decorator.MIDPOINT_ROOT_OPTION );
121 final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
122 if ( dissallowed_options.length() > 0 ) {
123 ForesterUtil.fatalError( decorator.PRG_NAME, "unknown option(s): " + dissallowed_options );
125 final boolean advanced_table = cla.isOptionSet( decorator.ADVANCED_TABLE_OPTION );
126 if ( !advanced_table ) {
127 final List<String> mandatory_options = new ArrayList<String>();
128 mandatory_options.add( decorator.FIELD_OPTION );
129 final String missing_options = cla.validateMandatoryOptionsAsString( mandatory_options );
130 if ( missing_options.length() > 0 ) {
131 ForesterUtil.fatalError( decorator.PRG_NAME, "missing option(s): " + missing_options );
134 final boolean picky = cla.isOptionSet( decorator.PICKY_OPTION );
135 char separator = decorator.MAPPING_FILE_SEPARATOR_DEFAULT;
136 if ( cla.isOptionSet( decorator.MAPPING_FILE_SEPARATOR_OPTION ) ) {
137 if ( advanced_table ) {
140 separator = cla.getOptionValueAsChar( decorator.MAPPING_FILE_SEPARATOR_OPTION );
143 int value_column = 1;
144 String field_str = "";
145 FIELD field = FIELD.NODE_NAME;
146 int numbers_of_chars_allowed_to_remove_if_not_found_in_map = -1;
147 boolean cut_name_after_space = false;
148 boolean process_name_intelligently = false;
149 boolean process_similar_to = false;
150 boolean extract_bracketed_scientific_name = false;
151 boolean extract_bracketed_tax_code = false;
152 boolean trim_after_tilde = false;
153 boolean order_tree = false;
154 boolean midpoint_root = false;
155 String tree_name = "";
157 String tree_desc = "";
159 if ( cla.isOptionSet( decorator.TREE_NAME_OPTION ) ) {
160 tree_name = cla.getOptionValueAsCleanString( decorator.TREE_NAME_OPTION );
162 if ( cla.isOptionSet( decorator.TREE_ID_OPTION ) ) {
163 tree_id = cla.getOptionValueAsCleanString( decorator.TREE_ID_OPTION );
165 if ( cla.isOptionSet( decorator.TREE_DESC_OPTION ) ) {
166 tree_desc = cla.getOptionValueAsCleanString( decorator.TREE_DESC_OPTION );
168 if ( cla.isOptionSet( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION ) ) {
169 if ( advanced_table ) {
172 extract_bracketed_scientific_name = true;
174 if ( cla.isOptionSet( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION ) ) {
175 if ( advanced_table ) {
178 extract_bracketed_tax_code = true;
180 if ( cla.isOptionSet( decorator.KEY_COLUMN ) ) {
181 if ( advanced_table ) {
184 key_column = cla.getOptionValueAsInt( decorator.KEY_COLUMN );
186 if ( cla.isOptionSet( decorator.VALUE_COLUMN ) ) {
187 if ( advanced_table ) {
190 value_column = cla.getOptionValueAsInt( decorator.VALUE_COLUMN );
192 if ( cla.isOptionSet( decorator.CUT_NAME_AFTER_FIRST_SPACE_OPTION ) ) {
193 if ( advanced_table ) {
196 cut_name_after_space = true;
198 if ( cla.isOptionSet( decorator.PROCESS_NAME_INTELLIGENTLY_OPTION ) ) {
199 if ( advanced_table ) {
202 process_name_intelligently = true;
204 if ( cla.isOptionSet( decorator.PROCESS_SIMILAR_TO_OPTION ) ) {
205 if ( advanced_table ) {
208 process_similar_to = true;
210 if ( cla.isOptionSet( decorator.TRIM_AFTER_TILDE_OPTION ) ) {
211 if ( advanced_table ) {
214 trim_after_tilde = true;
216 if ( cla.isOptionSet( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION ) ) {
217 numbers_of_chars_allowed_to_remove_if_not_found_in_map = cla
218 .getOptionValueAsInt( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION );
220 if ( cla.isOptionSet( decorator.MIDPOINT_ROOT_OPTION ) ) {
221 midpoint_root = true;
223 if ( cla.isOptionSet( decorator.ORDER_TREE_OPTION ) ) {
226 if ( cla.isOptionSet( decorator.FIELD_OPTION ) ) {
227 field_str = cla.getOptionValue( decorator.FIELD_OPTION );
228 if ( field_str.equals( NODE_NAME_FIELD ) ) {
229 field = FIELD.NODE_NAME;
231 else if ( field_str.equals( SEQUENCE_ANNOTATION_DESC ) ) {
232 field = FIELD.SEQUENCE_ANNOTATION_DESC;
234 else if ( field_str.equals( DS_FILED ) ) {
235 field = FIELD.DOMAIN_STRUCTURE;
236 extract_bracketed_scientific_name = false;
237 extract_bracketed_tax_code = false;
239 else if ( field_str.equals( TAXONOMY_CODE_FIELD ) ) {
240 field = FIELD.TAXONOMY_CODE;
242 else if ( field_str.equals( SEQUENCE_NAME_FIELD ) ) {
243 field = FIELD.SEQUENCE_NAME;
245 else if ( field_str.equals( TAXONOMY_SCIENTIFIC_NAME_FIELD ) ) {
246 field = FIELD.TAXONOMY_SCIENTIFIC_NAME;
247 extract_bracketed_scientific_name = false;
248 extract_bracketed_tax_code = false;
251 ForesterUtil.fatalError( decorator.PRG_NAME, "unknown value for \"" + decorator.FIELD_OPTION
252 + "\" option: \"" + field_str + "\"" );
256 catch ( final Exception e ) {
257 ForesterUtil.fatalError( decorator.PRG_NAME, "error in command line: " + e.getMessage() );
259 if ( ( field != FIELD.NODE_NAME ) && ( cut_name_after_space || process_name_intelligently ) ) {
260 ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -x or -c option without -f=n" );
262 if ( ( field != FIELD.NODE_NAME ) && process_similar_to ) {
263 ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION
264 + " option without -f=n" );
266 if ( cut_name_after_space && process_name_intelligently ) {
267 ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -x and -c option together" );
269 if ( process_similar_to && process_name_intelligently ) {
270 ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION
271 + " and -x option together" );
273 if ( process_similar_to && cut_name_after_space ) {
274 ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION
275 + " and -c option together" );
277 if ( extract_bracketed_scientific_name && extract_bracketed_tax_code ) {
280 Phylogeny[] phylogenies = null;
282 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
283 final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infile, true );
284 phylogenies = factory.create( phylogenies_infile, pp );
286 catch ( final Exception e ) {
287 ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read phylgenies from [" + phylogenies_infile
288 + "] [" + e.getMessage() + "]" );
290 Map<String, String> map = null;
291 if ( !advanced_table ) {
292 BasicTable<String> mapping_table = null;
294 mapping_table = BasicTableParser.parse( mapping_infile, separator, true, false );
296 catch ( final Exception e ) {
297 ForesterUtil.fatalError( decorator.PRG_NAME,
298 "failed to read [" + mapping_infile + "] [" + e.getMessage() + "]" );
300 if ( ( key_column < 0 ) || ( key_column >= mapping_table.getNumberOfColumns() ) ) {
301 ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for key column" );
303 if ( ( value_column < 0 ) || ( value_column >= mapping_table.getNumberOfColumns() ) ) {
304 ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for value column" );
306 map = mapping_table.getColumnsAsMap( key_column, value_column );
308 if ( !ForesterUtil.isEmpty( tree_name ) || !ForesterUtil.isEmpty( tree_id )
309 || !ForesterUtil.isEmpty( tree_desc ) ) {
310 if ( ( phylogenies.length > 1 )
311 && ( !ForesterUtil.isEmpty( tree_name ) || !ForesterUtil.isEmpty( tree_id ) ) ) {
312 ForesterUtil.fatalError( decorator.PRG_NAME,
313 "attempt to set same name or id on more than one phylogeny" );
315 if ( !ForesterUtil.isEmpty( tree_name ) ) {
316 phylogenies[ 0 ].setName( tree_name );
318 if ( !ForesterUtil.isEmpty( tree_id ) ) {
319 final String[] s_ary = tree_id.split( ":" );
320 phylogenies[ 0 ].setIdentifier( new Identifier( s_ary[ 1 ], s_ary[ 0 ] ) );
322 if ( !ForesterUtil.isEmpty( tree_desc ) ) {
323 for( final Phylogeny phylogenie : phylogenies ) {
324 phylogenie.setDescription( tree_desc );
329 if ( advanced_table ) {
330 Map<String, Map<String, String>> table = null;
332 table = PhylogenyDecorator.parseMappingTable( mapping_infile );
334 catch ( final IOException e ) {
335 ForesterUtil.fatalError( decorator.PRG_NAME,
336 "failed to read \"" + mapping_infile + "\" [" + e.getMessage() + "]" );
338 PhylogenyDecorator.decorate( phylogenies,
341 numbers_of_chars_allowed_to_remove_if_not_found_in_map );
344 PhylogenyDecorator.decorate( phylogenies,
347 extract_bracketed_scientific_name,
348 extract_bracketed_tax_code,
350 cut_name_after_space,
351 process_name_intelligently,
353 numbers_of_chars_allowed_to_remove_if_not_found_in_map,
357 catch ( final NullPointerException e ) {
358 ForesterUtil.unexpectedFatalError( decorator.PRG_NAME, e );
360 catch ( final Exception e ) {
361 ForesterUtil.fatalError( decorator.PRG_NAME, e.getLocalizedMessage() );
363 if ( midpoint_root || order_tree ) {
364 for( final Phylogeny phy : phylogenies ) {
365 if ( midpoint_root ) {
366 PhylogenyMethods.midpointRoot( phy );
369 PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.TAXONOMY );
374 final PhylogenyWriter w = new PhylogenyWriter();
375 w.toPhyloXML( phylogenies, 0, phylogenies_outfile, ForesterUtil.getLineSeparator() );
377 catch ( final IOException e ) {
378 ForesterUtil.fatalError( decorator.PRG_NAME, "failed to write output [" + e.getMessage() + "]" );
380 System.out.println();
381 ForesterUtil.programMessage( PRG_NAME, "wrote: " + phylogenies_outfile );
382 ForesterUtil.programMessage( PRG_NAME, "OK." );
385 private static void argumentsError() {
386 System.out.println();
387 System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f=<c> <phylogenies infile> "
388 + "[mapping table file] <phylogenies outfile>" );
389 System.out.println();
390 System.out.println( "options:" );
391 System.out.println();
392 System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=<c>)" );
393 System.out.println( " -r=<n> : allow to remove up to n characters from the end of the names" );
394 System.out.println( " in phylogenies infile if not found (in map) otherwise" );
395 System.out.println( " -p : picky, fails if node name not found in mapping table" );
396 System.out.println( " -" + TREE_NAME_OPTION + "=<s>: name for the phylogeny" );
397 System.out.println( " -" + TREE_ID_OPTION + "=<s>: identifier for the phylogeny (in the form provider:value)" );
398 System.out.println( " -" + TREE_DESC_OPTION + "=<s>: description for phylogenies" );
399 System.out.println();
400 System.out.println();
401 System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" );
402 System.out.println();
403 System.out.println( " -f=<c> : field to be replaced: " + NODE_NAME_FIELD + " : node name" );
404 System.out.println( " " + SEQUENCE_ANNOTATION_DESC
405 + " : sequence annotation description" );
406 System.out.println( " " + DS_FILED + " : domain structure" );
407 System.out.println( " " + TAXONOMY_CODE_FIELD + " : taxonomy code" );
408 System.out.println( " " + TAXONOMY_SCIENTIFIC_NAME_FIELD
409 + ": taxonomy scientific name" );
410 System.out.println( " " + SEQUENCE_NAME_FIELD + " : sequence name" );
411 System.out.println( " -k=<n> : key column in mapping table (0 based)," );
412 System.out.println( " names of the node to be decorated - default is 0" );
413 System.out.println( " -v=<n> : value column in mapping table (0 based)," );
414 System.out.println( " data which with to decorate - default is 1" );
415 System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION
416 + " : to extract bracketed scientific names, e.g. [Nematostella vectensis]" );
417 System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION
418 + " : to extract bracketed taxonomic codes, e.g. [NEMVE]" );
419 System.out.println( " -s=<c> : column separator in mapping file, default is \""
420 + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" );
421 System.out.println( " -x : process name \"intelligently\" (only for -f=n)" );
422 System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION
423 + " : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" );
424 System.out.println( " -c : cut name after first space (only for -f=n)" );
425 System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION
426 + " : trim node name to be replaced after tilde" );
427 System.out.println( " -" + decorator.MIDPOINT_ROOT_OPTION + " : to midpoint-root the tree" );
428 System.out.println( " -" + decorator.ORDER_TREE_OPTION + " : to order tree branches" );
429 System.out.println();