X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Fdecorator.java;h=54458a3dcbb00f7c7c1181885721f24b6625b09c;hb=9ccba62f1e15f16ec171ca1db42386e1ad137f06;hp=c318c760540c179bf78b161a5da98e97dabcd7ee;hpb=df1d96a02980b8f3f1c5a73ace65ec50b2e98e9b;p=jalview.git diff --git a/forester/java/src/org/forester/application/decorator.java b/forester/java/src/org/forester/application/decorator.java index c318c76..54458a3 100644 --- a/forester/java/src/org/forester/application/decorator.java +++ b/forester/java/src/org/forester/application/decorator.java @@ -26,13 +26,16 @@ package org.forester.application; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; @@ -42,6 +45,7 @@ import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.sequence.Sequence; import org.forester.tools.PhylogenyDecorator; import org.forester.tools.PhylogenyDecorator.FIELD; import org.forester.util.BasicTable; @@ -52,6 +56,7 @@ import org.forester.util.ForesterUtil; public final class decorator { private static final String SEQUENCE_NAME_FIELD = "s"; + private static final String MOL_SEQ = "m"; private static final String TAXONOMY_CODE_FIELD = "c"; private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD = "sn"; private static final String DS_FILED = "d"; @@ -60,6 +65,7 @@ public final class decorator { final static private String PICKY_OPTION = "p"; final static private String FIELD_OPTION = "f"; final static private String TRIM_AFTER_TILDE_OPTION = "t"; + final static private String VERBOSE_OPTION = "ve"; final static private String TREE_NAME_OPTION = "pn"; final static private String TREE_ID_OPTION = "pi"; final static private String TREE_DESC_OPTION = "pd"; @@ -67,22 +73,20 @@ public final class decorator { final static private String ORDER_TREE_OPTION = "or"; final static private String EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION = "sn"; final static private String EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION = "tc"; - final static private String PROCESS_NAME_INTELLIGENTLY_OPTION = "x"; - final static private String PROCESS_SIMILAR_TO_OPTION = "xs"; final static private String CUT_NAME_AFTER_FIRST_SPACE_OPTION = "c"; - final static private String ALLOW_REMOVAL_OF_CHARS_OPTION = "r"; final static private String ADVANCED_TABLE_OPTION = "table"; final static private String KEY_COLUMN = "k"; final static private String VALUE_COLUMN = "v"; final static private String MAPPING_FILE_SEPARATOR_OPTION = "s"; final static private char MAPPING_FILE_SEPARATOR_DEFAULT = '\t'; final static private String PRG_NAME = "decorator"; - final static private String PRG_VERSION = "1.14"; - final static private String PRG_DATE = "130426"; + final static private String PRG_VERSION = "1.16"; + final static private String PRG_DATE = "131113"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( decorator.PRG_NAME, decorator.PRG_VERSION, decorator.PRG_DATE ); - if ( ( args.length < 4 ) || ( args.length > 12 ) ) { + System.out.println(); + if ( ( args.length < 4 ) || ( args.length > 13 ) ) { decorator.argumentsError(); } CommandLineArguments cla = null; @@ -101,14 +105,19 @@ public final class decorator { if ( phylogenies_outfile.exists() ) { ForesterUtil.fatalError( PRG_NAME, "[" + phylogenies_outfile + "] already exists" ); } + String err = ForesterUtil.isReadableFile( phylogenies_infile ); + if ( !ForesterUtil.isEmpty( err ) ) { + ForesterUtil.fatalError( PRG_NAME, err ); + } + err = ForesterUtil.isReadableFile( mapping_infile ); + if ( !ForesterUtil.isEmpty( err ) ) { + ForesterUtil.fatalError( PRG_NAME, err ); + } final List allowed_options = new ArrayList(); allowed_options.add( decorator.ADVANCED_TABLE_OPTION ); allowed_options.add( decorator.PICKY_OPTION ); allowed_options.add( decorator.FIELD_OPTION ); - allowed_options.add( decorator.PROCESS_NAME_INTELLIGENTLY_OPTION ); - allowed_options.add( decorator.PROCESS_SIMILAR_TO_OPTION ); allowed_options.add( decorator.CUT_NAME_AFTER_FIRST_SPACE_OPTION ); - allowed_options.add( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION ); allowed_options.add( decorator.KEY_COLUMN ); allowed_options.add( decorator.VALUE_COLUMN ); allowed_options.add( decorator.MAPPING_FILE_SEPARATOR_OPTION ); @@ -120,6 +129,7 @@ public final class decorator { allowed_options.add( decorator.TRIM_AFTER_TILDE_OPTION ); allowed_options.add( decorator.ORDER_TREE_OPTION ); allowed_options.add( decorator.MIDPOINT_ROOT_OPTION ); + allowed_options.add( decorator.VERBOSE_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( decorator.PRG_NAME, "unknown option(s): " + dissallowed_options ); @@ -145,15 +155,13 @@ public final class decorator { int value_column = 1; String field_str = ""; FIELD field = FIELD.NODE_NAME; - int numbers_of_chars_allowed_to_remove_if_not_found_in_map = -1; boolean cut_name_after_space = false; - boolean process_name_intelligently = false; - boolean process_similar_to = false; boolean extract_bracketed_scientific_name = false; boolean extract_bracketed_tax_code = false; boolean trim_after_tilde = false; boolean order_tree = false; boolean midpoint_root = false; + boolean verbose = false; String tree_name = ""; String tree_id = ""; String tree_desc = ""; @@ -197,34 +205,21 @@ public final class decorator { } cut_name_after_space = true; } - if ( cla.isOptionSet( decorator.PROCESS_NAME_INTELLIGENTLY_OPTION ) ) { - if ( advanced_table ) { - argumentsError(); - } - process_name_intelligently = true; - } - if ( cla.isOptionSet( decorator.PROCESS_SIMILAR_TO_OPTION ) ) { - if ( advanced_table ) { - argumentsError(); - } - process_similar_to = true; - } if ( cla.isOptionSet( decorator.TRIM_AFTER_TILDE_OPTION ) ) { if ( advanced_table ) { argumentsError(); } trim_after_tilde = true; } - if ( cla.isOptionSet( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION ) ) { - numbers_of_chars_allowed_to_remove_if_not_found_in_map = cla - .getOptionValueAsInt( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION ); - } if ( cla.isOptionSet( decorator.MIDPOINT_ROOT_OPTION ) ) { midpoint_root = true; } if ( cla.isOptionSet( decorator.ORDER_TREE_OPTION ) ) { order_tree = true; } + if ( cla.isOptionSet( decorator.VERBOSE_OPTION ) ) { + verbose = true; + } if ( cla.isOptionSet( decorator.FIELD_OPTION ) ) { field_str = cla.getOptionValue( decorator.FIELD_OPTION ); if ( field_str.equals( NODE_NAME_FIELD ) ) { @@ -244,6 +239,9 @@ public final class decorator { else if ( field_str.equals( SEQUENCE_NAME_FIELD ) ) { field = FIELD.SEQUENCE_NAME; } + else if ( field_str.equals( MOL_SEQ ) ) { + field = FIELD.MOL_SEQ; + } else if ( field_str.equals( TAXONOMY_SCIENTIFIC_NAME_FIELD ) ) { field = FIELD.TAXONOMY_SCIENTIFIC_NAME; extract_bracketed_scientific_name = false; @@ -258,27 +256,13 @@ public final class decorator { catch ( final Exception e ) { ForesterUtil.fatalError( decorator.PRG_NAME, "error in command line: " + e.getMessage() ); } - if ( ( field != FIELD.NODE_NAME ) && ( cut_name_after_space || process_name_intelligently ) ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -x or -c option without -f=n" ); - } - if ( ( field != FIELD.NODE_NAME ) && process_similar_to ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION - + " option without -f=n" ); - } - if ( cut_name_after_space && process_name_intelligently ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -x and -c option together" ); - } - if ( process_similar_to && process_name_intelligently ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION - + " and -x option together" ); - } - if ( process_similar_to && cut_name_after_space ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION - + " and -c option together" ); - } if ( extract_bracketed_scientific_name && extract_bracketed_tax_code ) { argumentsError(); } + ForesterUtil.programMessage( PRG_NAME, "input tree(s) : " + phylogenies_infile ); + ForesterUtil.programMessage( PRG_NAME, "map : " + mapping_infile ); + ForesterUtil.programMessage( PRG_NAME, "output tree(s): " + phylogenies_outfile ); + System.out.println(); Phylogeny[] phylogenies = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); @@ -291,34 +275,51 @@ public final class decorator { } Map map = null; if ( !advanced_table ) { - BasicTable mapping_table = null; - try { - mapping_table = BasicTableParser.parse( mapping_infile, separator, true, false ); - } - catch ( final Exception e ) { - ForesterUtil.fatalError( decorator.PRG_NAME, - "failed to read [" + mapping_infile + "] [" + e.getMessage() + "]" ); - } - if ( ( key_column < 0 ) || ( key_column >= mapping_table.getNumberOfColumns() ) ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for key column" ); - } - if ( ( value_column < 0 ) || ( value_column >= mapping_table.getNumberOfColumns() ) ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for value column" ); - } - if ( mapping_table.isEmpty() || ( mapping_table.getNumberOfColumns() < 1 ) ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table is empty" ); - } - if ( mapping_table.getNumberOfColumns() == 1 ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table has only one column" ); + if ( field != FIELD.MOL_SEQ ) { + BasicTable mapping_table = null; + try { + mapping_table = BasicTableParser.parse( mapping_infile, separator, true, false ); + } + catch ( final Exception e ) { + ForesterUtil.fatalError( decorator.PRG_NAME, + "failed to read [" + mapping_infile + "] [" + e.getMessage() + "]" ); + } + if ( ( key_column < 0 ) || ( key_column >= mapping_table.getNumberOfColumns() ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for key column" ); + } + if ( ( value_column < 0 ) || ( value_column >= mapping_table.getNumberOfColumns() ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for value column" ); + } + if ( mapping_table.isEmpty() || ( mapping_table.getNumberOfColumns() < 1 ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table is empty" ); + } + if ( mapping_table.getNumberOfColumns() == 1 ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table has only one column" ); + } + map = mapping_table.getColumnsAsMap( key_column, value_column ); + final Iterator> iter = map.entrySet().iterator(); + if ( verbose ) { + System.out.println(); + } + while ( iter.hasNext() ) { + final Entry e = iter.next(); + if ( ForesterUtil.isEmpty( e.getKey() ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table contains empty key" ); + } + if ( ForesterUtil.isEmpty( e.getValue() ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table contains empty value" ); + } + if ( verbose ) { + System.out.println( e.getKey() + " => " + e.getValue() ); + } + } + if ( verbose ) { + System.out.println(); + } } - map = mapping_table.getColumnsAsMap( key_column, value_column ); - final Iterator> iter = map.entrySet().iterator(); - System.out.println(); - while ( iter.hasNext() ) { - final Entry e = iter.next(); - System.out.println( e.getKey() + " => " + e.getValue() ); + else { + map = readFastaFileIntoMap( mapping_infile, verbose ); } - System.out.println(); } if ( !ForesterUtil.isEmpty( tree_name ) || !ForesterUtil.isEmpty( tree_id ) || !ForesterUtil.isEmpty( tree_desc ) ) { @@ -350,23 +351,23 @@ public final class decorator { ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read \"" + mapping_infile + "\" [" + e.getMessage() + "]" ); } - PhylogenyDecorator.decorate( phylogenies, - table, - picky, - numbers_of_chars_allowed_to_remove_if_not_found_in_map ); + for( final Phylogeny phylogenie : phylogenies ) { + PhylogenyDecorator.decorate( phylogenie, table, picky ); + } } else { - PhylogenyDecorator.decorate( phylogenies, - map, - field, - extract_bracketed_scientific_name, - extract_bracketed_tax_code, - picky, - cut_name_after_space, - process_name_intelligently, - process_similar_to, - numbers_of_chars_allowed_to_remove_if_not_found_in_map, - trim_after_tilde ); + for( final Phylogeny phylogenie : phylogenies ) { + final String msg = PhylogenyDecorator.decorate( phylogenie, + map, + field, + extract_bracketed_scientific_name, + extract_bracketed_tax_code, + picky, + cut_name_after_space, + trim_after_tilde, + verbose ); + ForesterUtil.programMessage( PRG_NAME, msg ); + } } } catch ( final NullPointerException e ) { @@ -397,16 +398,48 @@ public final class decorator { ForesterUtil.programMessage( PRG_NAME, "OK." ); } + private static Map readFastaFileIntoMap( final File mapping_infile, final boolean verbose ) { + List seqs = null; + try { + seqs = FastaParser.parse( new FileInputStream( mapping_infile ) ); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read fasta-file from [" + mapping_infile + "] [" + + e.getMessage() + "]" ); + } + if ( ForesterUtil.isEmpty( seqs ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "fasta-file [" + mapping_infile + + "] is devoid of fasta-formatted sequences" ); + } + final Map map = new HashMap(); + for( final Sequence seq : seqs ) { + if ( ForesterUtil.isEmpty( seq.getIdentifier() ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "fasta-file [" + mapping_infile + + "] contains sequence with empty identifier" ); + } + if ( map.containsKey( seq.getIdentifier() ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "sequence identifier [" + seq.getIdentifier() + + "] is not unique" ); + } + if ( seq.getLength() < 1 ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "sequence [" + seq.getIdentifier() + "] is empty" ); + } + map.put( seq.getIdentifier(), seq.getMolecularSequenceAsString() ); + if ( verbose ) { + System.out.println( seq.getIdentifier() + " => " + seq.getMolecularSequenceAsString() ); + } + } + return map; + } + private static void argumentsError() { System.out.println(); System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f= " - + "[mapping table file] " ); + + " " ); System.out.println(); System.out.println( "options:" ); System.out.println(); System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=)" ); - System.out.println( " -r= : allow to remove up to n characters from the end of the names" ); - System.out.println( " in phylogenies infile if not found (in map) otherwise" ); System.out.println( " -p : picky, fails if node name not found in mapping table" ); System.out.println( " -" + TREE_NAME_OPTION + "=: name for the phylogeny" ); System.out.println( " -" + TREE_ID_OPTION + "=: identifier for the phylogeny (in the form provider:value)" ); @@ -423,6 +456,7 @@ public final class decorator { System.out.println( " " + TAXONOMY_SCIENTIFIC_NAME_FIELD + ": taxonomy scientific name" ); System.out.println( " " + SEQUENCE_NAME_FIELD + " : sequence name" ); + System.out.println( " " + MOL_SEQ + " : molecular sequence" ); System.out.println( " -k= : key column in mapping table (0 based)," ); System.out.println( " names of the node to be decorated - default is 0" ); System.out.println( " -v= : value column in mapping table (0 based)," ); @@ -431,16 +465,13 @@ public final class decorator { + " : to extract bracketed scientific names, e.g. [Nematostella vectensis]" ); System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION + " : to extract bracketed taxonomic codes, e.g. [NEMVE]" ); - System.out.println( " -s= : column separator in mapping file, default is \"" - + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" ); - System.out.println( " -x : process name \"intelligently\" (only for -f=n)" ); - System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION - + " : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" ); + System.out.println( " -s= : column separator in mapping file, default is tab" ); System.out.println( " -c : cut name after first space (only for -f=n)" ); System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION + " : trim node name to be replaced after tilde" ); - System.out.println( " -" + decorator.MIDPOINT_ROOT_OPTION + " : to midpoint-root the tree" ); - System.out.println( " -" + decorator.ORDER_TREE_OPTION + " : to order tree branches" ); + System.out.println( " -" + decorator.MIDPOINT_ROOT_OPTION + " : to midpoint-root the tree" ); + System.out.println( " -" + decorator.ORDER_TREE_OPTION + " : to order tree branches" ); + System.out.println( " -" + decorator.VERBOSE_OPTION + " : verbose" ); System.out.println(); System.exit( -1 ); }