X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Fdecorator.java;h=d4bc1e4cfeeb014ec93a34f62445c40376ab00ef;hb=8ab567b0fe466b5206e2ffd7c02d101a12252419;hp=c1a2786507e336b7967a48e600d2719254e5c7f4;hpb=493e40b0c936b65da342134da37e8b856b9b80af;p=jalview.git diff --git a/forester/java/src/org/forester/application/decorator.java b/forester/java/src/org/forester/application/decorator.java index c1a2786..d4bc1e4 100644 --- a/forester/java/src/org/forester/application/decorator.java +++ b/forester/java/src/org/forester/application/decorator.java @@ -5,7 +5,7 @@ // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved -// +// // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either @@ -15,13 +15,13 @@ // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. -// +// // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; @@ -32,8 +32,11 @@ import java.util.List; import java.util.Map; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; +import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; @@ -46,74 +49,34 @@ import org.forester.util.ForesterUtil; public final class decorator { - private static final String SEQUENCE_NAME_FIELD = "s"; - private static final String TAXONOMY_CODE_FIELD = "c"; - private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD = "sn"; - private static final String DS_FILED = "d"; - private static final String SEQUENCE_ANNOTATION_DESC = "a"; - private static final String NODE_NAME_FIELD = "n"; - final static private String PICKY_OPTION = "p"; - final static private String FIELD_OPTION = "f"; - final static private String MOVE_DOMAIN_NUMBER_OPTION = "mdn"; // Hidden expert option. - final static private String TREE_NAME_OPTION = "pn"; - final static private String TREE_ID_OPTION = "pi"; - final static private String TREE_DESC_OPTION = "pd"; - final static private String EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION = "sn"; - final static private String PROCESS_NAME_INTELLIGENTLY_OPTION = "x"; - final static private String PROCESS_SIMILAR_TO_OPTION = "xs"; - final static private String CUT_NAME_AFTER_FIRST_SPACE_OPTION = "c"; - final static private String ALLOW_REMOVAL_OF_CHARS_OPTION = "r"; - final static private String ADVANCED_TABLE_OPTION = "table"; - final static private String KEY_COLUMN = "k"; - final static private String VALUE_COLUMN = "v"; - final static private String MAPPING_FILE_SEPARATOR_OPTION = "s"; - final static private String MAPPING_FILE_SEPARATOR_DEFAULT = ":"; - final static private boolean USE_FIRST_SEPARATOR_ONLY = true; - final static private String PRG_NAME = "decorator"; - final static private String PRG_VERSION = "1.10"; - final static private String PRG_DATE = "2009.10.08"; - - private static void argumentsError() { - System.out.println(); - System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f= " - + "[mapping table file] " ); - System.out.println(); - System.out.println( "options:" ); - System.out.println(); - System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=)" ); - System.out.println( " -r= : allow to remove up to n characters from the end of the names" ); - System.out.println( " in phylogenies infile if not found (in map) otherwise" ); - System.out.println( " -p : for picky, fails if node name not found in mapping table, default is off" ); - System.out.println( " -" + TREE_NAME_OPTION + "=: name for the phylogeny" ); - System.out.println( " -" + TREE_ID_OPTION + "=: identifier for the phylogeny (in the form provider:value)" ); - System.out.println( " -" + TREE_DESC_OPTION + "=: description for phylogenies" ); - System.out.println(); - System.out.println(); - System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" ); - System.out.println(); - System.out.println( " -f= : field to be replaced: " + NODE_NAME_FIELD + " : node name" ); - System.out.println( " " + SEQUENCE_ANNOTATION_DESC - + " : sequence annotation description" ); - System.out.println( " " + DS_FILED + " : domain structure" ); - System.out.println( " " + TAXONOMY_CODE_FIELD + " : taxonomy code" ); - System.out.println( " " + TAXONOMY_SCIENTIFIC_NAME_FIELD - + ": taxonomy scientific name" ); - System.out.println( " " + SEQUENCE_NAME_FIELD + " : sequence name" ); - System.out.println( " -k= : key column in mapping table (0 based)," ); - System.out.println( " names of the node to be decorated - default is 0" ); - System.out.println( " -v= : value column in mapping table (0 based)," ); - System.out.println( " data which with to decorate - default is 1" ); - System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION - + " : to extract bracketed scientific names" ); - System.out.println( " -s= : column separator in mapping file, default is \"" - + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" ); - System.out.println( " -x : process name \"intelligently\" (only for -f=n)" ); - System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION - + " : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" ); - System.out.println( " -c : cut name after first space (only for -f=n)" ); - System.out.println(); - System.exit( -1 ); - } + private static final String SEQUENCE_NAME_FIELD = "s"; + private static final String TAXONOMY_CODE_FIELD = "c"; + private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD = "sn"; + private static final String DS_FILED = "d"; + private static final String SEQUENCE_ANNOTATION_DESC = "a"; + private static final String NODE_NAME_FIELD = "n"; + final static private String PICKY_OPTION = "p"; + final static private String FIELD_OPTION = "f"; + final static private String TRIM_AFTER_TILDE_OPTION = "t"; + final static private String TREE_NAME_OPTION = "pn"; + final static private String TREE_ID_OPTION = "pi"; + final static private String TREE_DESC_OPTION = "pd"; + final static private String MIDPOINT_ROOT_OPTION = "mp"; + final static private String ORDER_TREE_OPTION = "or"; + final static private String EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION = "sn"; + final static private String EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION = "tc"; + final static private String PROCESS_NAME_INTELLIGENTLY_OPTION = "x"; + final static private String PROCESS_SIMILAR_TO_OPTION = "xs"; + final static private String CUT_NAME_AFTER_FIRST_SPACE_OPTION = "c"; + final static private String ALLOW_REMOVAL_OF_CHARS_OPTION = "r"; + final static private String ADVANCED_TABLE_OPTION = "table"; + final static private String KEY_COLUMN = "k"; + final static private String VALUE_COLUMN = "v"; + final static private String MAPPING_FILE_SEPARATOR_OPTION = "s"; + final static private char MAPPING_FILE_SEPARATOR_DEFAULT = '\t'; + final static private String PRG_NAME = "decorator"; + final static private String PRG_VERSION = "1.14"; + final static private String PRG_DATE = "130426"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( decorator.PRG_NAME, decorator.PRG_VERSION, decorator.PRG_DATE ); @@ -148,10 +111,13 @@ public final class decorator { allowed_options.add( decorator.VALUE_COLUMN ); allowed_options.add( decorator.MAPPING_FILE_SEPARATOR_OPTION ); allowed_options.add( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION ); + allowed_options.add( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION ); allowed_options.add( decorator.TREE_NAME_OPTION ); allowed_options.add( decorator.TREE_ID_OPTION ); allowed_options.add( decorator.TREE_DESC_OPTION ); - allowed_options.add( decorator.MOVE_DOMAIN_NUMBER_OPTION ); + allowed_options.add( decorator.TRIM_AFTER_TILDE_OPTION ); + allowed_options.add( decorator.ORDER_TREE_OPTION ); + allowed_options.add( decorator.MIDPOINT_ROOT_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( decorator.PRG_NAME, "unknown option(s): " + dissallowed_options ); @@ -166,12 +132,12 @@ public final class decorator { } } final boolean picky = cla.isOptionSet( decorator.PICKY_OPTION ); - String separator = decorator.MAPPING_FILE_SEPARATOR_DEFAULT; + char separator = decorator.MAPPING_FILE_SEPARATOR_DEFAULT; if ( cla.isOptionSet( decorator.MAPPING_FILE_SEPARATOR_OPTION ) ) { if ( advanced_table ) { argumentsError(); } - separator = cla.getOptionValue( decorator.MAPPING_FILE_SEPARATOR_OPTION ); + separator = cla.getOptionValueAsChar( decorator.MAPPING_FILE_SEPARATOR_OPTION ); } int key_column = 0; int value_column = 1; @@ -182,7 +148,10 @@ public final class decorator { boolean process_name_intelligently = false; boolean process_similar_to = false; boolean extract_bracketed_scientific_name = false; - boolean move_domain_numbers_at_end_to_middle = false; + boolean extract_bracketed_tax_code = false; + boolean trim_after_tilde = false; + boolean order_tree = false; + boolean midpoint_root = false; String tree_name = ""; String tree_id = ""; String tree_desc = ""; @@ -202,6 +171,12 @@ public final class decorator { } extract_bracketed_scientific_name = true; } + if ( cla.isOptionSet( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION ) ) { + if ( advanced_table ) { + argumentsError(); + } + extract_bracketed_tax_code = true; + } if ( cla.isOptionSet( decorator.KEY_COLUMN ) ) { if ( advanced_table ) { argumentsError(); @@ -232,12 +207,21 @@ public final class decorator { } process_similar_to = true; } + if ( cla.isOptionSet( decorator.TRIM_AFTER_TILDE_OPTION ) ) { + if ( advanced_table ) { + argumentsError(); + } + trim_after_tilde = true; + } if ( cla.isOptionSet( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION ) ) { numbers_of_chars_allowed_to_remove_if_not_found_in_map = cla .getOptionValueAsInt( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION ); } - if ( cla.isOptionSet( decorator.MOVE_DOMAIN_NUMBER_OPTION ) ) { - move_domain_numbers_at_end_to_middle = true; + if ( cla.isOptionSet( decorator.MIDPOINT_ROOT_OPTION ) ) { + midpoint_root = true; + } + if ( cla.isOptionSet( decorator.ORDER_TREE_OPTION ) ) { + order_tree = true; } if ( cla.isOptionSet( decorator.FIELD_OPTION ) ) { field_str = cla.getOptionValue( decorator.FIELD_OPTION ); @@ -250,6 +234,7 @@ public final class decorator { else if ( field_str.equals( DS_FILED ) ) { field = FIELD.DOMAIN_STRUCTURE; extract_bracketed_scientific_name = false; + extract_bracketed_tax_code = false; } else if ( field_str.equals( TAXONOMY_CODE_FIELD ) ) { field = FIELD.TAXONOMY_CODE; @@ -260,6 +245,7 @@ public final class decorator { else if ( field_str.equals( TAXONOMY_SCIENTIFIC_NAME_FIELD ) ) { field = FIELD.TAXONOMY_SCIENTIFIC_NAME; extract_bracketed_scientific_name = false; + extract_bracketed_tax_code = false; } else { ForesterUtil.fatalError( decorator.PRG_NAME, "unknown value for \"" + decorator.FIELD_OPTION @@ -288,10 +274,13 @@ public final class decorator { ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION + " and -c option together" ); } + if ( extract_bracketed_scientific_name && extract_bracketed_tax_code ) { + argumentsError(); + } Phylogeny[] phylogenies = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( phylogenies_infile, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infile, true ); phylogenies = factory.create( phylogenies_infile, pp ); } catch ( final Exception e ) { @@ -302,11 +291,11 @@ public final class decorator { if ( !advanced_table ) { BasicTable mapping_table = null; try { - mapping_table = BasicTableParser.parse( mapping_infile, separator, decorator.USE_FIRST_SEPARATOR_ONLY ); + mapping_table = BasicTableParser.parse( mapping_infile, separator, true, false ); } catch ( final Exception e ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read [" + mapping_infile + "] [" - + e.getMessage() + "]" ); + ForesterUtil.fatalError( decorator.PRG_NAME, + "failed to read [" + mapping_infile + "] [" + e.getMessage() + "]" ); } if ( ( key_column < 0 ) || ( key_column >= mapping_table.getNumberOfColumns() ) ) { ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for key column" ); @@ -331,8 +320,8 @@ public final class decorator { phylogenies[ 0 ].setIdentifier( new Identifier( s_ary[ 1 ], s_ary[ 0 ] ) ); } if ( !ForesterUtil.isEmpty( tree_desc ) ) { - for( int i = 0; i < phylogenies.length; ++i ) { - phylogenies[ i ].setDescription( tree_desc ); + for( final Phylogeny phylogenie : phylogenies ) { + phylogenie.setDescription( tree_desc ); } } } @@ -343,8 +332,8 @@ public final class decorator { table = PhylogenyDecorator.parseMappingTable( mapping_infile ); } catch ( final IOException e ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read \"" + mapping_infile + "\" [" - + e.getMessage() + "]" ); + ForesterUtil.fatalError( decorator.PRG_NAME, + "failed to read \"" + mapping_infile + "\" [" + e.getMessage() + "]" ); } PhylogenyDecorator.decorate( phylogenies, table, @@ -356,19 +345,30 @@ public final class decorator { map, field, extract_bracketed_scientific_name, + extract_bracketed_tax_code, picky, cut_name_after_space, process_name_intelligently, process_similar_to, numbers_of_chars_allowed_to_remove_if_not_found_in_map, - move_domain_numbers_at_end_to_middle ); + trim_after_tilde ); } } catch ( final NullPointerException e ) { ForesterUtil.unexpectedFatalError( decorator.PRG_NAME, e ); } catch ( final Exception e ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "failed to map [" + e + "]" ); + ForesterUtil.fatalError( decorator.PRG_NAME, e.getLocalizedMessage() ); + } + if ( midpoint_root || order_tree ) { + for( final Phylogeny phy : phylogenies ) { + if ( midpoint_root ) { + PhylogenyMethods.midpointRoot( phy ); + } + if ( order_tree ) { + PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.TAXONOMY ); + } + } } try { final PhylogenyWriter w = new PhylogenyWriter(); @@ -381,4 +381,52 @@ public final class decorator { ForesterUtil.programMessage( PRG_NAME, "wrote: " + phylogenies_outfile ); ForesterUtil.programMessage( PRG_NAME, "OK." ); } + + private static void argumentsError() { + System.out.println(); + System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f= " + + "[mapping table file] " ); + System.out.println(); + System.out.println( "options:" ); + System.out.println(); + System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=)" ); + System.out.println( " -r= : allow to remove up to n characters from the end of the names" ); + System.out.println( " in phylogenies infile if not found (in map) otherwise" ); + System.out.println( " -p : picky, fails if node name not found in mapping table" ); + System.out.println( " -" + TREE_NAME_OPTION + "=: name for the phylogeny" ); + System.out.println( " -" + TREE_ID_OPTION + "=: identifier for the phylogeny (in the form provider:value)" ); + System.out.println( " -" + TREE_DESC_OPTION + "=: description for phylogenies" ); + System.out.println(); + System.out.println(); + System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" ); + System.out.println(); + System.out.println( " -f= : field to be replaced: " + NODE_NAME_FIELD + " : node name" ); + System.out.println( " " + SEQUENCE_ANNOTATION_DESC + + " : sequence annotation description" ); + System.out.println( " " + DS_FILED + " : domain structure" ); + System.out.println( " " + TAXONOMY_CODE_FIELD + " : taxonomy code" ); + System.out.println( " " + TAXONOMY_SCIENTIFIC_NAME_FIELD + + ": taxonomy scientific name" ); + System.out.println( " " + SEQUENCE_NAME_FIELD + " : sequence name" ); + System.out.println( " -k= : key column in mapping table (0 based)," ); + System.out.println( " names of the node to be decorated - default is 0" ); + System.out.println( " -v= : value column in mapping table (0 based)," ); + System.out.println( " data which with to decorate - default is 1" ); + System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION + + " : to extract bracketed scientific names, e.g. [Nematostella vectensis]" ); + System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION + + " : to extract bracketed taxonomic codes, e.g. [NEMVE]" ); + System.out.println( " -s= : column separator in mapping file, default is \"" + + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" ); + System.out.println( " -x : process name \"intelligently\" (only for -f=n)" ); + System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION + + " : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" ); + System.out.println( " -c : cut name after first space (only for -f=n)" ); + System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION + + " : trim node name to be replaced after tilde" ); + System.out.println( " -" + decorator.MIDPOINT_ROOT_OPTION + " : to midpoint-root the tree" ); + System.out.println( " -" + decorator.ORDER_TREE_OPTION + " : to order tree branches" ); + System.out.println(); + System.exit( -1 ); + } }