small clean-up.
[jalview.git] / forester / java / src / org / forester / application / decorator.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.application;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.List;
32 import java.util.Map;
33
34 import org.forester.io.parsers.PhylogenyParser;
35 import org.forester.io.writers.PhylogenyWriter;
36 import org.forester.phylogeny.Phylogeny;
37 import org.forester.phylogeny.data.Identifier;
38 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
39 import org.forester.phylogeny.factories.PhylogenyFactory;
40 import org.forester.tools.PhylogenyDecorator;
41 import org.forester.tools.PhylogenyDecorator.FIELD;
42 import org.forester.util.BasicTable;
43 import org.forester.util.BasicTableParser;
44 import org.forester.util.CommandLineArguments;
45 import org.forester.util.ForesterUtil;
46
47 public final class decorator {
48
49     private static final String  SEQUENCE_NAME_FIELD                    = "s";
50     private static final String  TAXONOMY_CODE_FIELD                    = "c";
51     private static final String  TAXONOMY_SCIENTIFIC_NAME_FIELD         = "sn";
52     private static final String  DS_FILED                               = "d";
53     private static final String  SEQUENCE_ANNOTATION_DESC               = "a";
54     private static final String  NODE_NAME_FIELD                        = "n";
55     final static private String  PICKY_OPTION                           = "p";
56     final static private String  FIELD_OPTION                           = "f";
57     final static private String  MOVE_DOMAIN_NUMBER_OPTION              = "mdn";       // Hidden expert option.
58     final static private String  TREE_NAME_OPTION                       = "pn";
59     final static private String  TREE_ID_OPTION                         = "pi";
60     final static private String  TREE_DESC_OPTION                       = "pd";
61     final static private String  EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION = "sn";
62     final static private String  PROCESS_NAME_INTELLIGENTLY_OPTION      = "x";
63     final static private String  PROCESS_SIMILAR_TO_OPTION              = "xs";
64     final static private String  CUT_NAME_AFTER_FIRST_SPACE_OPTION      = "c";
65     final static private String  ALLOW_REMOVAL_OF_CHARS_OPTION          = "r";
66     final static private String  ADVANCED_TABLE_OPTION                  = "table";
67     final static private String  KEY_COLUMN                             = "k";
68     final static private String  VALUE_COLUMN                           = "v";
69     final static private String  MAPPING_FILE_SEPARATOR_OPTION          = "s";
70     final static private String  MAPPING_FILE_SEPARATOR_DEFAULT         = ":";
71     final static private boolean USE_FIRST_SEPARATOR_ONLY               = true;
72     final static private String  PRG_NAME                               = "decorator";
73     final static private String  PRG_VERSION                            = "1.10";
74     final static private String  PRG_DATE                               = "2009.10.08";
75
76     private static void argumentsError() {
77         System.out.println();
78         System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f=<c> <phylogenies infile> "
79                 + "[mapping table file] <phylogenies outfile>" );
80         System.out.println();
81         System.out.println( "options:" );
82         System.out.println();
83         System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=<c>)" );
84         System.out.println( " -r=<n> : allow to remove up to n characters from the end of the names" );
85         System.out.println( "          in phylogenies infile if not found (in map) otherwise" );
86         System.out.println( " -p     : for picky, fails if node name not found in mapping table, default is off" );
87         System.out.println( " -" + TREE_NAME_OPTION + "=<s>: name for the phylogeny" );
88         System.out.println( " -" + TREE_ID_OPTION + "=<s>: identifier for the phylogeny (in the form provider:value)" );
89         System.out.println( " -" + TREE_DESC_OPTION + "=<s>: description for phylogenies" );
90         System.out.println();
91         System.out.println();
92         System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" );
93         System.out.println();
94         System.out.println( " -f=<c> : field to be replaced: " + NODE_NAME_FIELD + " : node name" );
95         System.out.println( "                                " + SEQUENCE_ANNOTATION_DESC
96                 + " : sequence annotation description" );
97         System.out.println( "                                " + DS_FILED + " : domain structure" );
98         System.out.println( "                                " + TAXONOMY_CODE_FIELD + " : taxonomy code" );
99         System.out.println( "                                " + TAXONOMY_SCIENTIFIC_NAME_FIELD
100                 + ": taxonomy scientific name" );
101         System.out.println( "                                " + SEQUENCE_NAME_FIELD + " : sequence name" );
102         System.out.println( " -k=<n> : key column in mapping table (0 based)," );
103         System.out.println( "          names of the node to be decorated - default is 0" );
104         System.out.println( " -v=<n> : value column in mapping table (0 based)," );
105         System.out.println( "          data which with to decorate - default is 1" );
106         System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION
107                 + "    : to extract bracketed scientific names" );
108         System.out.println( " -s=<c> : column separator in mapping file, default is \""
109                 + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" );
110         System.out.println( " -x     : process name \"intelligently\" (only for -f=n)" );
111         System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION
112                 + "    : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" );
113         System.out.println( " -c     : cut name after first space (only for -f=n)" );
114         System.out.println();
115         System.exit( -1 );
116     }
117
118     public static void main( final String args[] ) {
119         ForesterUtil.printProgramInformation( decorator.PRG_NAME, decorator.PRG_VERSION, decorator.PRG_DATE );
120         if ( ( args.length < 4 ) || ( args.length > 12 ) ) {
121             decorator.argumentsError();
122         }
123         CommandLineArguments cla = null;
124         try {
125             cla = new CommandLineArguments( args );
126         }
127         catch ( final Exception e ) {
128             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
129         }
130         if ( ( cla.getNumberOfNames() < 3 ) || ( cla.getNumberOfNames() > 4 ) ) {
131             decorator.argumentsError();
132         }
133         final File phylogenies_infile = cla.getFile( 0 );
134         final File mapping_infile = cla.getFile( 1 );
135         final File phylogenies_outfile = cla.getFile( 2 );
136         if ( phylogenies_outfile.exists() ) {
137             ForesterUtil.fatalError( PRG_NAME, "[" + phylogenies_outfile + "] already exists" );
138         }
139         final List<String> allowed_options = new ArrayList<String>();
140         allowed_options.add( decorator.ADVANCED_TABLE_OPTION );
141         allowed_options.add( decorator.PICKY_OPTION );
142         allowed_options.add( decorator.FIELD_OPTION );
143         allowed_options.add( decorator.PROCESS_NAME_INTELLIGENTLY_OPTION );
144         allowed_options.add( decorator.PROCESS_SIMILAR_TO_OPTION );
145         allowed_options.add( decorator.CUT_NAME_AFTER_FIRST_SPACE_OPTION );
146         allowed_options.add( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION );
147         allowed_options.add( decorator.KEY_COLUMN );
148         allowed_options.add( decorator.VALUE_COLUMN );
149         allowed_options.add( decorator.MAPPING_FILE_SEPARATOR_OPTION );
150         allowed_options.add( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION );
151         allowed_options.add( decorator.TREE_NAME_OPTION );
152         allowed_options.add( decorator.TREE_ID_OPTION );
153         allowed_options.add( decorator.TREE_DESC_OPTION );
154         allowed_options.add( decorator.MOVE_DOMAIN_NUMBER_OPTION );
155         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
156         if ( dissallowed_options.length() > 0 ) {
157             ForesterUtil.fatalError( decorator.PRG_NAME, "unknown option(s): " + dissallowed_options );
158         }
159         final boolean advanced_table = cla.isOptionSet( decorator.ADVANCED_TABLE_OPTION );
160         if ( !advanced_table ) {
161             final List<String> mandatory_options = new ArrayList<String>();
162             mandatory_options.add( decorator.FIELD_OPTION );
163             final String missing_options = cla.validateMandatoryOptionsAsString( mandatory_options );
164             if ( missing_options.length() > 0 ) {
165                 ForesterUtil.fatalError( decorator.PRG_NAME, "missing option(s): " + missing_options );
166             }
167         }
168         final boolean picky = cla.isOptionSet( decorator.PICKY_OPTION );
169         String separator = decorator.MAPPING_FILE_SEPARATOR_DEFAULT;
170         if ( cla.isOptionSet( decorator.MAPPING_FILE_SEPARATOR_OPTION ) ) {
171             if ( advanced_table ) {
172                 argumentsError();
173             }
174             separator = cla.getOptionValue( decorator.MAPPING_FILE_SEPARATOR_OPTION );
175         }
176         int key_column = 0;
177         int value_column = 1;
178         String field_str = "";
179         FIELD field = FIELD.NODE_NAME;
180         int numbers_of_chars_allowed_to_remove_if_not_found_in_map = -1;
181         boolean cut_name_after_space = false;
182         boolean process_name_intelligently = false;
183         boolean process_similar_to = false;
184         boolean extract_bracketed_scientific_name = false;
185         boolean move_domain_numbers_at_end_to_middle = false;
186         String tree_name = "";
187         String tree_id = "";
188         String tree_desc = "";
189         try {
190             if ( cla.isOptionSet( decorator.TREE_NAME_OPTION ) ) {
191                 tree_name = cla.getOptionValueAsCleanString( decorator.TREE_NAME_OPTION );
192             }
193             if ( cla.isOptionSet( decorator.TREE_ID_OPTION ) ) {
194                 tree_id = cla.getOptionValueAsCleanString( decorator.TREE_ID_OPTION );
195             }
196             if ( cla.isOptionSet( decorator.TREE_DESC_OPTION ) ) {
197                 tree_desc = cla.getOptionValueAsCleanString( decorator.TREE_DESC_OPTION );
198             }
199             if ( cla.isOptionSet( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION ) ) {
200                 if ( advanced_table ) {
201                     argumentsError();
202                 }
203                 extract_bracketed_scientific_name = true;
204             }
205             if ( cla.isOptionSet( decorator.KEY_COLUMN ) ) {
206                 if ( advanced_table ) {
207                     argumentsError();
208                 }
209                 key_column = cla.getOptionValueAsInt( decorator.KEY_COLUMN );
210             }
211             if ( cla.isOptionSet( decorator.VALUE_COLUMN ) ) {
212                 if ( advanced_table ) {
213                     argumentsError();
214                 }
215                 value_column = cla.getOptionValueAsInt( decorator.VALUE_COLUMN );
216             }
217             if ( cla.isOptionSet( decorator.CUT_NAME_AFTER_FIRST_SPACE_OPTION ) ) {
218                 if ( advanced_table ) {
219                     argumentsError();
220                 }
221                 cut_name_after_space = true;
222             }
223             if ( cla.isOptionSet( decorator.PROCESS_NAME_INTELLIGENTLY_OPTION ) ) {
224                 if ( advanced_table ) {
225                     argumentsError();
226                 }
227                 process_name_intelligently = true;
228             }
229             if ( cla.isOptionSet( decorator.PROCESS_SIMILAR_TO_OPTION ) ) {
230                 if ( advanced_table ) {
231                     argumentsError();
232                 }
233                 process_similar_to = true;
234             }
235             if ( cla.isOptionSet( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION ) ) {
236                 numbers_of_chars_allowed_to_remove_if_not_found_in_map = cla
237                         .getOptionValueAsInt( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION );
238             }
239             if ( cla.isOptionSet( decorator.MOVE_DOMAIN_NUMBER_OPTION ) ) {
240                 move_domain_numbers_at_end_to_middle = true;
241             }
242             if ( cla.isOptionSet( decorator.FIELD_OPTION ) ) {
243                 field_str = cla.getOptionValue( decorator.FIELD_OPTION );
244                 if ( field_str.equals( NODE_NAME_FIELD ) ) {
245                     field = FIELD.NODE_NAME;
246                 }
247                 else if ( field_str.equals( SEQUENCE_ANNOTATION_DESC ) ) {
248                     field = FIELD.SEQUENCE_ANNOTATION_DESC;
249                 }
250                 else if ( field_str.equals( DS_FILED ) ) {
251                     field = FIELD.DOMAIN_STRUCTURE;
252                     extract_bracketed_scientific_name = false;
253                 }
254                 else if ( field_str.equals( TAXONOMY_CODE_FIELD ) ) {
255                     field = FIELD.TAXONOMY_CODE;
256                 }
257                 else if ( field_str.equals( SEQUENCE_NAME_FIELD ) ) {
258                     field = FIELD.SEQUENCE_NAME;
259                 }
260                 else if ( field_str.equals( TAXONOMY_SCIENTIFIC_NAME_FIELD ) ) {
261                     field = FIELD.TAXONOMY_SCIENTIFIC_NAME;
262                     extract_bracketed_scientific_name = false;
263                 }
264                 else {
265                     ForesterUtil.fatalError( decorator.PRG_NAME, "unknown value for \"" + decorator.FIELD_OPTION
266                             + "\" option: \"" + field_str + "\"" );
267                 }
268             }
269         }
270         catch ( final Exception e ) {
271             ForesterUtil.fatalError( decorator.PRG_NAME, "error in command line: " + e.getMessage() );
272         }
273         if ( ( field != FIELD.NODE_NAME ) && ( cut_name_after_space || process_name_intelligently ) ) {
274             ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -x or -c option without -f=n" );
275         }
276         if ( ( field != FIELD.NODE_NAME ) && process_similar_to ) {
277             ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION
278                     + " option without -f=n" );
279         }
280         if ( cut_name_after_space && process_name_intelligently ) {
281             ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -x and -c option together" );
282         }
283         if ( process_similar_to && process_name_intelligently ) {
284             ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION
285                     + " and -x option together" );
286         }
287         if ( process_similar_to && cut_name_after_space ) {
288             ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION
289                     + " and -c option together" );
290         }
291         Phylogeny[] phylogenies = null;
292         try {
293             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
294             final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( phylogenies_infile, true );
295             phylogenies = factory.create( phylogenies_infile, pp );
296         }
297         catch ( final Exception e ) {
298             ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read phylgenies from [" + phylogenies_infile
299                     + "] [" + e.getMessage() + "]" );
300         }
301         Map<String, String> map = null;
302         if ( !advanced_table ) {
303             BasicTable<String> mapping_table = null;
304             try {
305                 mapping_table = BasicTableParser.parse( mapping_infile, separator, decorator.USE_FIRST_SEPARATOR_ONLY );
306             }
307             catch ( final Exception e ) {
308                 ForesterUtil.fatalError( decorator.PRG_NAME,
309                                          "failed to read [" + mapping_infile + "] [" + e.getMessage() + "]" );
310             }
311             if ( ( key_column < 0 ) || ( key_column >= mapping_table.getNumberOfColumns() ) ) {
312                 ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for key column" );
313             }
314             if ( ( value_column < 0 ) || ( value_column >= mapping_table.getNumberOfColumns() ) ) {
315                 ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for value column" );
316             }
317             map = mapping_table.getColumnsAsMap( key_column, value_column );
318         }
319         if ( !ForesterUtil.isEmpty( tree_name ) || !ForesterUtil.isEmpty( tree_id )
320                 || !ForesterUtil.isEmpty( tree_desc ) ) {
321             if ( ( phylogenies.length > 1 )
322                     && ( !ForesterUtil.isEmpty( tree_name ) || !ForesterUtil.isEmpty( tree_id ) ) ) {
323                 ForesterUtil.fatalError( decorator.PRG_NAME,
324                                          "attempt to set same name or id on more than one phylogeny" );
325             }
326             if ( !ForesterUtil.isEmpty( tree_name ) ) {
327                 phylogenies[ 0 ].setName( tree_name );
328             }
329             if ( !ForesterUtil.isEmpty( tree_id ) ) {
330                 final String[] s_ary = tree_id.split( ":" );
331                 phylogenies[ 0 ].setIdentifier( new Identifier( s_ary[ 1 ], s_ary[ 0 ] ) );
332             }
333             if ( !ForesterUtil.isEmpty( tree_desc ) ) {
334                 for( int i = 0; i < phylogenies.length; ++i ) {
335                     phylogenies[ i ].setDescription( tree_desc );
336                 }
337             }
338         }
339         try {
340             if ( advanced_table ) {
341                 Map<String, Map<String, String>> table = null;
342                 try {
343                     table = PhylogenyDecorator.parseMappingTable( mapping_infile );
344                 }
345                 catch ( final IOException e ) {
346                     ForesterUtil.fatalError( decorator.PRG_NAME,
347                                              "failed to read \"" + mapping_infile + "\" [" + e.getMessage() + "]" );
348                 }
349                 PhylogenyDecorator.decorate( phylogenies,
350                                              table,
351                                              picky,
352                                              numbers_of_chars_allowed_to_remove_if_not_found_in_map );
353             }
354             else {
355                 PhylogenyDecorator.decorate( phylogenies,
356                                              map,
357                                              field,
358                                              extract_bracketed_scientific_name,
359                                              picky,
360                                              cut_name_after_space,
361                                              process_name_intelligently,
362                                              process_similar_to,
363                                              numbers_of_chars_allowed_to_remove_if_not_found_in_map,
364                                              move_domain_numbers_at_end_to_middle );
365             }
366         }
367         catch ( final NullPointerException e ) {
368             ForesterUtil.unexpectedFatalError( decorator.PRG_NAME, e );
369         }
370         catch ( final Exception e ) {
371             ForesterUtil.fatalError( decorator.PRG_NAME, "failed to map [" + e + "]" );
372         }
373         try {
374             final PhylogenyWriter w = new PhylogenyWriter();
375             w.toPhyloXML( phylogenies, 0, phylogenies_outfile, ForesterUtil.getLineSeparator() );
376         }
377         catch ( final IOException e ) {
378             ForesterUtil.fatalError( decorator.PRG_NAME, "failed to write output [" + e.getMessage() + "]" );
379         }
380         System.out.println();
381         ForesterUtil.programMessage( PRG_NAME, "wrote: " + phylogenies_outfile );
382         ForesterUtil.programMessage( PRG_NAME, "OK." );
383     }
384 }