4f77bdec196512bbc6eb144c3f4b26c206496e50
[jalview.git] / forester / java / src / org / forester / application / decorator.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.application;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.List;
32 import java.util.Map;
33
34 import org.forester.io.parsers.PhylogenyParser;
35 import org.forester.io.parsers.util.ParserUtils;
36 import org.forester.io.writers.PhylogenyWriter;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.data.Identifier;
39 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
40 import org.forester.phylogeny.factories.PhylogenyFactory;
41 import org.forester.tools.PhylogenyDecorator;
42 import org.forester.tools.PhylogenyDecorator.FIELD;
43 import org.forester.util.BasicTable;
44 import org.forester.util.BasicTableParser;
45 import org.forester.util.CommandLineArguments;
46 import org.forester.util.ForesterUtil;
47
48 public final class decorator {
49
50     private static final String  SEQUENCE_NAME_FIELD                    = "s";
51     private static final String  TAXONOMY_CODE_FIELD                    = "c";
52     private static final String  TAXONOMY_SCIENTIFIC_NAME_FIELD         = "sn";
53     private static final String  DS_FILED                               = "d";
54     private static final String  SEQUENCE_ANNOTATION_DESC               = "a";
55     private static final String  NODE_NAME_FIELD                        = "n";
56     final static private String  PICKY_OPTION                           = "p";
57     final static private String  FIELD_OPTION                           = "f";
58     final static private String  TRIM_AFTER_TILDE_OPTION                = "t";
59     final static private String  MOVE_DOMAIN_NUMBER_OPTION              = "mdn";       // Hidden expert option.
60     final static private String  TREE_NAME_OPTION                       = "pn";
61     final static private String  TREE_ID_OPTION                         = "pi";
62     final static private String  TREE_DESC_OPTION                       = "pd";
63     final static private String  EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION = "sn";
64     final static private String  PROCESS_NAME_INTELLIGENTLY_OPTION      = "x";
65     final static private String  PROCESS_SIMILAR_TO_OPTION              = "xs";
66     final static private String  CUT_NAME_AFTER_FIRST_SPACE_OPTION      = "c";
67     final static private String  ALLOW_REMOVAL_OF_CHARS_OPTION          = "r";
68     final static private String  ADVANCED_TABLE_OPTION                  = "table";
69     final static private String  KEY_COLUMN                             = "k";
70     final static private String  VALUE_COLUMN                           = "v";
71     final static private String  MAPPING_FILE_SEPARATOR_OPTION          = "s";
72     final static private String  MAPPING_FILE_SEPARATOR_DEFAULT         = ":";
73     final static private boolean USE_FIRST_SEPARATOR_ONLY               = true;
74     final static private String  PRG_NAME                               = "decorator";
75     final static private String  PRG_VERSION                            = "1.11";
76     final static private String  PRG_DATE                               = "2012.09.15";
77
78     private static void argumentsError() {
79         System.out.println();
80         System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f=<c> <phylogenies infile> "
81                 + "[mapping table file] <phylogenies outfile>" );
82         System.out.println();
83         System.out.println( "options:" );
84         System.out.println();
85         System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=<c>)" );
86         System.out.println( " -r=<n> : allow to remove up to n characters from the end of the names" );
87         System.out.println( "          in phylogenies infile if not found (in map) otherwise" );
88         System.out.println( " -p     : picky, fails if node name not found in mapping table" );
89         System.out.println( " -" + TREE_NAME_OPTION + "=<s>: name for the phylogeny" );
90         System.out.println( " -" + TREE_ID_OPTION + "=<s>: identifier for the phylogeny (in the form provider:value)" );
91         System.out.println( " -" + TREE_DESC_OPTION + "=<s>: description for phylogenies" );
92         System.out.println();
93         System.out.println();
94         System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" );
95         System.out.println();
96         System.out.println( " -f=<c> : field to be replaced: " + NODE_NAME_FIELD + " : node name" );
97         System.out.println( "                                " + SEQUENCE_ANNOTATION_DESC
98                 + " : sequence annotation description" );
99         System.out.println( "                                " + DS_FILED + " : domain structure" );
100         System.out.println( "                                " + TAXONOMY_CODE_FIELD + " : taxonomy code" );
101         System.out.println( "                                " + TAXONOMY_SCIENTIFIC_NAME_FIELD
102                 + ": taxonomy scientific name" );
103         System.out.println( "                                " + SEQUENCE_NAME_FIELD + " : sequence name" );
104         System.out.println( " -k=<n> : key column in mapping table (0 based)," );
105         System.out.println( "          names of the node to be decorated - default is 0" );
106         System.out.println( " -v=<n> : value column in mapping table (0 based)," );
107         System.out.println( "          data which with to decorate - default is 1" );
108         System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION
109                 + "    : to extract bracketed scientific names" );
110         System.out.println( " -s=<c> : column separator in mapping file, default is \""
111                 + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" );
112         System.out.println( " -x     : process name \"intelligently\" (only for -f=n)" );
113         System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION
114                 + "    : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" );
115         System.out.println( " -c     : cut name after first space (only for -f=n)" );
116         System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION
117                 + "     : trim node name to be replaced after tilde" );
118         System.out.println();
119         System.exit( -1 );
120     }
121
122     public static void main( final String args[] ) {
123         ForesterUtil.printProgramInformation( decorator.PRG_NAME, decorator.PRG_VERSION, decorator.PRG_DATE );
124         if ( ( args.length < 4 ) || ( args.length > 12 ) ) {
125             decorator.argumentsError();
126         }
127         CommandLineArguments cla = null;
128         try {
129             cla = new CommandLineArguments( args );
130         }
131         catch ( final Exception e ) {
132             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
133         }
134         if ( ( cla.getNumberOfNames() < 3 ) || ( cla.getNumberOfNames() > 4 ) ) {
135             decorator.argumentsError();
136         }
137         final File phylogenies_infile = cla.getFile( 0 );
138         final File mapping_infile = cla.getFile( 1 );
139         final File phylogenies_outfile = cla.getFile( 2 );
140         if ( phylogenies_outfile.exists() ) {
141             ForesterUtil.fatalError( PRG_NAME, "[" + phylogenies_outfile + "] already exists" );
142         }
143         final List<String> allowed_options = new ArrayList<String>();
144         allowed_options.add( decorator.ADVANCED_TABLE_OPTION );
145         allowed_options.add( decorator.PICKY_OPTION );
146         allowed_options.add( decorator.FIELD_OPTION );
147         allowed_options.add( decorator.PROCESS_NAME_INTELLIGENTLY_OPTION );
148         allowed_options.add( decorator.PROCESS_SIMILAR_TO_OPTION );
149         allowed_options.add( decorator.CUT_NAME_AFTER_FIRST_SPACE_OPTION );
150         allowed_options.add( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION );
151         allowed_options.add( decorator.KEY_COLUMN );
152         allowed_options.add( decorator.VALUE_COLUMN );
153         allowed_options.add( decorator.MAPPING_FILE_SEPARATOR_OPTION );
154         allowed_options.add( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION );
155         allowed_options.add( decorator.TREE_NAME_OPTION );
156         allowed_options.add( decorator.TREE_ID_OPTION );
157         allowed_options.add( decorator.TREE_DESC_OPTION );
158         allowed_options.add( decorator.MOVE_DOMAIN_NUMBER_OPTION );
159         allowed_options.add( decorator.TRIM_AFTER_TILDE_OPTION );
160         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
161         if ( dissallowed_options.length() > 0 ) {
162             ForesterUtil.fatalError( decorator.PRG_NAME, "unknown option(s): " + dissallowed_options );
163         }
164         final boolean advanced_table = cla.isOptionSet( decorator.ADVANCED_TABLE_OPTION );
165         if ( !advanced_table ) {
166             final List<String> mandatory_options = new ArrayList<String>();
167             mandatory_options.add( decorator.FIELD_OPTION );
168             final String missing_options = cla.validateMandatoryOptionsAsString( mandatory_options );
169             if ( missing_options.length() > 0 ) {
170                 ForesterUtil.fatalError( decorator.PRG_NAME, "missing option(s): " + missing_options );
171             }
172         }
173         final boolean picky = cla.isOptionSet( decorator.PICKY_OPTION );
174         String separator = decorator.MAPPING_FILE_SEPARATOR_DEFAULT;
175         if ( cla.isOptionSet( decorator.MAPPING_FILE_SEPARATOR_OPTION ) ) {
176             if ( advanced_table ) {
177                 argumentsError();
178             }
179             separator = cla.getOptionValue( decorator.MAPPING_FILE_SEPARATOR_OPTION );
180         }
181         int key_column = 0;
182         int value_column = 1;
183         String field_str = "";
184         FIELD field = FIELD.NODE_NAME;
185         int numbers_of_chars_allowed_to_remove_if_not_found_in_map = -1;
186         boolean cut_name_after_space = false;
187         boolean process_name_intelligently = false;
188         boolean process_similar_to = false;
189         boolean extract_bracketed_scientific_name = false;
190         boolean move_domain_numbers_at_end_to_middle = false;
191         boolean trim_after_tilde = false;
192         String tree_name = "";
193         String tree_id = "";
194         String tree_desc = "";
195         try {
196             if ( cla.isOptionSet( decorator.TREE_NAME_OPTION ) ) {
197                 tree_name = cla.getOptionValueAsCleanString( decorator.TREE_NAME_OPTION );
198             }
199             if ( cla.isOptionSet( decorator.TREE_ID_OPTION ) ) {
200                 tree_id = cla.getOptionValueAsCleanString( decorator.TREE_ID_OPTION );
201             }
202             if ( cla.isOptionSet( decorator.TREE_DESC_OPTION ) ) {
203                 tree_desc = cla.getOptionValueAsCleanString( decorator.TREE_DESC_OPTION );
204             }
205             if ( cla.isOptionSet( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION ) ) {
206                 if ( advanced_table ) {
207                     argumentsError();
208                 }
209                 extract_bracketed_scientific_name = true;
210             }
211             if ( cla.isOptionSet( decorator.KEY_COLUMN ) ) {
212                 if ( advanced_table ) {
213                     argumentsError();
214                 }
215                 key_column = cla.getOptionValueAsInt( decorator.KEY_COLUMN );
216             }
217             if ( cla.isOptionSet( decorator.VALUE_COLUMN ) ) {
218                 if ( advanced_table ) {
219                     argumentsError();
220                 }
221                 value_column = cla.getOptionValueAsInt( decorator.VALUE_COLUMN );
222             }
223             if ( cla.isOptionSet( decorator.CUT_NAME_AFTER_FIRST_SPACE_OPTION ) ) {
224                 if ( advanced_table ) {
225                     argumentsError();
226                 }
227                 cut_name_after_space = true;
228             }
229             if ( cla.isOptionSet( decorator.PROCESS_NAME_INTELLIGENTLY_OPTION ) ) {
230                 if ( advanced_table ) {
231                     argumentsError();
232                 }
233                 process_name_intelligently = true;
234             }
235             if ( cla.isOptionSet( decorator.PROCESS_SIMILAR_TO_OPTION ) ) {
236                 if ( advanced_table ) {
237                     argumentsError();
238                 }
239                 process_similar_to = true;
240             }
241             if ( cla.isOptionSet( decorator.TRIM_AFTER_TILDE_OPTION ) ) {
242                 if ( advanced_table ) {
243                     argumentsError();
244                 }
245                 trim_after_tilde = true;
246             }
247             if ( cla.isOptionSet( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION ) ) {
248                 numbers_of_chars_allowed_to_remove_if_not_found_in_map = cla
249                         .getOptionValueAsInt( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION );
250             }
251             if ( cla.isOptionSet( decorator.MOVE_DOMAIN_NUMBER_OPTION ) ) {
252                 move_domain_numbers_at_end_to_middle = true;
253             }
254             if ( cla.isOptionSet( decorator.FIELD_OPTION ) ) {
255                 field_str = cla.getOptionValue( decorator.FIELD_OPTION );
256                 if ( field_str.equals( NODE_NAME_FIELD ) ) {
257                     field = FIELD.NODE_NAME;
258                 }
259                 else if ( field_str.equals( SEQUENCE_ANNOTATION_DESC ) ) {
260                     field = FIELD.SEQUENCE_ANNOTATION_DESC;
261                 }
262                 else if ( field_str.equals( DS_FILED ) ) {
263                     field = FIELD.DOMAIN_STRUCTURE;
264                     extract_bracketed_scientific_name = false;
265                 }
266                 else if ( field_str.equals( TAXONOMY_CODE_FIELD ) ) {
267                     field = FIELD.TAXONOMY_CODE;
268                 }
269                 else if ( field_str.equals( SEQUENCE_NAME_FIELD ) ) {
270                     field = FIELD.SEQUENCE_NAME;
271                 }
272                 else if ( field_str.equals( TAXONOMY_SCIENTIFIC_NAME_FIELD ) ) {
273                     field = FIELD.TAXONOMY_SCIENTIFIC_NAME;
274                     extract_bracketed_scientific_name = false;
275                 }
276                 else {
277                     ForesterUtil.fatalError( decorator.PRG_NAME, "unknown value for \"" + decorator.FIELD_OPTION
278                             + "\" option: \"" + field_str + "\"" );
279                 }
280             }
281         }
282         catch ( final Exception e ) {
283             ForesterUtil.fatalError( decorator.PRG_NAME, "error in command line: " + e.getMessage() );
284         }
285         if ( ( field != FIELD.NODE_NAME ) && ( cut_name_after_space || process_name_intelligently ) ) {
286             ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -x or -c option without -f=n" );
287         }
288         if ( ( field != FIELD.NODE_NAME ) && process_similar_to ) {
289             ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION
290                     + " option without -f=n" );
291         }
292         if ( cut_name_after_space && process_name_intelligently ) {
293             ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -x and -c option together" );
294         }
295         if ( process_similar_to && process_name_intelligently ) {
296             ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION
297                     + " and -x option together" );
298         }
299         if ( process_similar_to && cut_name_after_space ) {
300             ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION
301                     + " and -c option together" );
302         }
303         Phylogeny[] phylogenies = null;
304         try {
305             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
306             final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infile, true );
307             phylogenies = factory.create( phylogenies_infile, pp );
308         }
309         catch ( final Exception e ) {
310             ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read phylgenies from [" + phylogenies_infile
311                     + "] [" + e.getMessage() + "]" );
312         }
313         Map<String, String> map = null;
314         if ( !advanced_table ) {
315             BasicTable<String> mapping_table = null;
316             try {
317                 mapping_table = BasicTableParser.parse( mapping_infile, separator, false, true );
318             }
319             catch ( final Exception e ) {
320                 ForesterUtil.fatalError( decorator.PRG_NAME,
321                                          "failed to read [" + mapping_infile + "] [" + e.getMessage() + "]" );
322             }
323             System.out.println( mapping_table.toString() );
324             if ( ( key_column < 0 ) || ( key_column >= mapping_table.getNumberOfColumns() ) ) {
325                 ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for key column" );
326             }
327             if ( ( value_column < 0 ) || ( value_column >= mapping_table.getNumberOfColumns() ) ) {
328                 ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for value column" );
329             }
330             map = mapping_table.getColumnsAsMap( key_column, value_column );
331         }
332         if ( !ForesterUtil.isEmpty( tree_name ) || !ForesterUtil.isEmpty( tree_id )
333                 || !ForesterUtil.isEmpty( tree_desc ) ) {
334             if ( ( phylogenies.length > 1 )
335                     && ( !ForesterUtil.isEmpty( tree_name ) || !ForesterUtil.isEmpty( tree_id ) ) ) {
336                 ForesterUtil.fatalError( decorator.PRG_NAME,
337                                          "attempt to set same name or id on more than one phylogeny" );
338             }
339             if ( !ForesterUtil.isEmpty( tree_name ) ) {
340                 phylogenies[ 0 ].setName( tree_name );
341             }
342             if ( !ForesterUtil.isEmpty( tree_id ) ) {
343                 final String[] s_ary = tree_id.split( ":" );
344                 phylogenies[ 0 ].setIdentifier( new Identifier( s_ary[ 1 ], s_ary[ 0 ] ) );
345             }
346             if ( !ForesterUtil.isEmpty( tree_desc ) ) {
347                 for( int i = 0; i < phylogenies.length; ++i ) {
348                     phylogenies[ i ].setDescription( tree_desc );
349                 }
350             }
351         }
352         try {
353             if ( advanced_table ) {
354                 Map<String, Map<String, String>> table = null;
355                 try {
356                     table = PhylogenyDecorator.parseMappingTable( mapping_infile );
357                 }
358                 catch ( final IOException e ) {
359                     ForesterUtil.fatalError( decorator.PRG_NAME,
360                                              "failed to read \"" + mapping_infile + "\" [" + e.getMessage() + "]" );
361                 }
362                 PhylogenyDecorator.decorate( phylogenies,
363                                              table,
364                                              picky,
365                                              numbers_of_chars_allowed_to_remove_if_not_found_in_map );
366             }
367             else {
368                 PhylogenyDecorator.decorate( phylogenies,
369                                              map,
370                                              field,
371                                              extract_bracketed_scientific_name,
372                                              picky,
373                                              cut_name_after_space,
374                                              process_name_intelligently,
375                                              process_similar_to,
376                                              numbers_of_chars_allowed_to_remove_if_not_found_in_map,
377                                              move_domain_numbers_at_end_to_middle,
378                                              trim_after_tilde );
379             }
380         }
381         catch ( final NullPointerException e ) {
382             ForesterUtil.unexpectedFatalError( decorator.PRG_NAME, e );
383         }
384         catch ( final Exception e ) {
385             ForesterUtil.fatalError( decorator.PRG_NAME, e.getLocalizedMessage() );
386         }
387         try {
388             final PhylogenyWriter w = new PhylogenyWriter();
389             w.toPhyloXML( phylogenies, 0, phylogenies_outfile, ForesterUtil.getLineSeparator() );
390         }
391         catch ( final IOException e ) {
392             ForesterUtil.fatalError( decorator.PRG_NAME, "failed to write output [" + e.getMessage() + "]" );
393         }
394         System.out.println();
395         ForesterUtil.programMessage( PRG_NAME, "wrote: " + phylogenies_outfile );
396         ForesterUtil.programMessage( PRG_NAME, "OK." );
397     }
398 }