From c7c4e34d403f220262b490dd3cbe5d300f114a18 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Fri, 21 Dec 2012 20:57:20 +0000 Subject: [PATCH] decorator fixed --- .../src/org/forester/application/decorator.java | 97 ++++++++-------- .../src/org/forester/tools/PhylogenyDecorator.java | 117 +++++++------------- 2 files changed, 89 insertions(+), 125 deletions(-) diff --git a/forester/java/src/org/forester/application/decorator.java b/forester/java/src/org/forester/application/decorator.java index 18966af..8084ee2 100644 --- a/forester/java/src/org/forester/application/decorator.java +++ b/forester/java/src/org/forester/application/decorator.java @@ -71,54 +71,8 @@ public final class decorator { final static private String MAPPING_FILE_SEPARATOR_OPTION = "s"; final static private String MAPPING_FILE_SEPARATOR_DEFAULT = ": "; final static private String PRG_NAME = "decorator"; - final static private String PRG_VERSION = "1.11"; - final static private String PRG_DATE = "2012.09.15"; - - private static void argumentsError() { - System.out.println(); - System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f= " - + "[mapping table file] " ); - System.out.println(); - System.out.println( "options:" ); - System.out.println(); - System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=)" ); - System.out.println( " -r= : allow to remove up to n characters from the end of the names" ); - System.out.println( " in phylogenies infile if not found (in map) otherwise" ); - System.out.println( " -p : picky, fails if node name not found in mapping table" ); - System.out.println( " -" + TREE_NAME_OPTION + "=: name for the phylogeny" ); - System.out.println( " -" + TREE_ID_OPTION + "=: identifier for the phylogeny (in the form provider:value)" ); - System.out.println( " -" + TREE_DESC_OPTION + "=: description for phylogenies" ); - System.out.println(); - System.out.println(); - System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" ); - System.out.println(); - System.out.println( " -f= : field to be replaced: " + NODE_NAME_FIELD + " : node name" ); - System.out.println( " " + SEQUENCE_ANNOTATION_DESC - + " : sequence annotation description" ); - System.out.println( " " + DS_FILED + " : domain structure" ); - System.out.println( " " + TAXONOMY_CODE_FIELD + " : taxonomy code" ); - System.out.println( " " + TAXONOMY_SCIENTIFIC_NAME_FIELD - + ": taxonomy scientific name" ); - System.out.println( " " + SEQUENCE_NAME_FIELD + " : sequence name" ); - System.out.println( " -k= : key column in mapping table (0 based)," ); - System.out.println( " names of the node to be decorated - default is 0" ); - System.out.println( " -v= : value column in mapping table (0 based)," ); - System.out.println( " data which with to decorate - default is 1" ); - System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION - + " : to extract bracketed scientific names, e.g. [Nematostella vectensis]" ); - System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION - + " : to extract bracketed taxonomic codes, e.g. [NEMVE]" ); - System.out.println( " -s= : column separator in mapping file, default is \"" - + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" ); - System.out.println( " -x : process name \"intelligently\" (only for -f=n)" ); - System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION - + " : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" ); - System.out.println( " -c : cut name after first space (only for -f=n)" ); - System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION - + " : trim node name to be replaced after tilde" ); - System.out.println(); - System.exit( -1 ); - } + final static private String PRG_VERSION = "1.12"; + final static private String PRG_DATE = "2012.12.21"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( decorator.PRG_NAME, decorator.PRG_VERSION, decorator.PRG_DATE ); @@ -153,6 +107,7 @@ public final class decorator { allowed_options.add( decorator.VALUE_COLUMN ); allowed_options.add( decorator.MAPPING_FILE_SEPARATOR_OPTION ); allowed_options.add( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION ); + allowed_options.add( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION ); allowed_options.add( decorator.TREE_NAME_OPTION ); allowed_options.add( decorator.TREE_ID_OPTION ); allowed_options.add( decorator.TREE_DESC_OPTION ); @@ -402,4 +357,50 @@ public final class decorator { ForesterUtil.programMessage( PRG_NAME, "wrote: " + phylogenies_outfile ); ForesterUtil.programMessage( PRG_NAME, "OK." ); } + + private static void argumentsError() { + System.out.println(); + System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f= " + + "[mapping table file] " ); + System.out.println(); + System.out.println( "options:" ); + System.out.println(); + System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=)" ); + System.out.println( " -r= : allow to remove up to n characters from the end of the names" ); + System.out.println( " in phylogenies infile if not found (in map) otherwise" ); + System.out.println( " -p : picky, fails if node name not found in mapping table" ); + System.out.println( " -" + TREE_NAME_OPTION + "=: name for the phylogeny" ); + System.out.println( " -" + TREE_ID_OPTION + "=: identifier for the phylogeny (in the form provider:value)" ); + System.out.println( " -" + TREE_DESC_OPTION + "=: description for phylogenies" ); + System.out.println(); + System.out.println(); + System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" ); + System.out.println(); + System.out.println( " -f= : field to be replaced: " + NODE_NAME_FIELD + " : node name" ); + System.out.println( " " + SEQUENCE_ANNOTATION_DESC + + " : sequence annotation description" ); + System.out.println( " " + DS_FILED + " : domain structure" ); + System.out.println( " " + TAXONOMY_CODE_FIELD + " : taxonomy code" ); + System.out.println( " " + TAXONOMY_SCIENTIFIC_NAME_FIELD + + ": taxonomy scientific name" ); + System.out.println( " " + SEQUENCE_NAME_FIELD + " : sequence name" ); + System.out.println( " -k= : key column in mapping table (0 based)," ); + System.out.println( " names of the node to be decorated - default is 0" ); + System.out.println( " -v= : value column in mapping table (0 based)," ); + System.out.println( " data which with to decorate - default is 1" ); + System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION + + " : to extract bracketed scientific names, e.g. [Nematostella vectensis]" ); + System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION + + " : to extract bracketed taxonomic codes, e.g. [NEMVE]" ); + System.out.println( " -s= : column separator in mapping file, default is \"" + + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" ); + System.out.println( " -x : process name \"intelligently\" (only for -f=n)" ); + System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION + + " : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" ); + System.out.println( " -c : cut name after first space (only for -f=n)" ); + System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION + + " : trim node name to be replaced after tilde" ); + System.out.println(); + System.exit( -1 ); + } } diff --git a/forester/java/src/org/forester/tools/PhylogenyDecorator.java b/forester/java/src/org/forester/tools/PhylogenyDecorator.java index 7e70444..f9bcd1f 100644 --- a/forester/java/src/org/forester/tools/PhylogenyDecorator.java +++ b/forester/java/src/org/forester/tools/PhylogenyDecorator.java @@ -29,7 +29,6 @@ import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; -import java.util.regex.Pattern; import org.forester.io.parsers.nhx.NHXFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; @@ -48,25 +47,22 @@ import org.forester.util.ForesterUtil; public final class PhylogenyDecorator { // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb: - final private static String TP_TAXONOMY_CODE = "TAXONOMY_CODE"; - final private static String TP_TAXONOMY_ID = "TAXONOMY_ID"; - final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER"; - final private static String TP_TAXONOMY_SN = "TAXONOMY_SN"; - final private static String TP_TAXONOMY_CN = "TAXONOMY_CN"; - final private static String TP_TAXONOMY_SYN = "TAXONOMY_SYN"; - final private static String TP_SEQ_SYMBOL = "SEQ_SYMBOL"; - final private static String TP_SEQ_ACCESSION = "SEQ_ACCESSION"; - final private static String TP_SEQ_ACCESSION_SOURCE = "SEQ_ACCESSION_SOURCE"; - final private static String TP_SEQ_ANNOTATION_DESC = "SEQ_ANNOTATION_DESC"; - final private static String TP_SEQ_ANNOTATION_REF = "SEQ_ANNOTATION_REF"; - final private static String TP_SEQ_MOL_SEQ = "SEQ_MOL_SEQ"; - final private static String TP_SEQ_NAME = "SEQ_NAME"; - final private static String TP_NODE_NAME = "NODE_NAME"; - final private static Pattern NODENAME_SEQNUMBER_TAXDOMAINNUMBER = Pattern - .compile( "^([a-fA-Z0-9]{1,5})_([A-Z0-9]{2,4}[A-Z])(\\d{1,4})$" ); - public final static boolean SANITIZE = false; - public final static boolean VERBOSE = true; - private static final boolean CUT = true; + final private static String TP_TAXONOMY_CODE = "TAXONOMY_CODE"; + final private static String TP_TAXONOMY_ID = "TAXONOMY_ID"; + final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER"; + final private static String TP_TAXONOMY_SN = "TAXONOMY_SN"; + final private static String TP_TAXONOMY_CN = "TAXONOMY_CN"; + final private static String TP_TAXONOMY_SYN = "TAXONOMY_SYN"; + final private static String TP_SEQ_SYMBOL = "SEQ_SYMBOL"; + final private static String TP_SEQ_ACCESSION = "SEQ_ACCESSION"; + final private static String TP_SEQ_ACCESSION_SOURCE = "SEQ_ACCESSION_SOURCE"; + final private static String TP_SEQ_ANNOTATION_DESC = "SEQ_ANNOTATION_DESC"; + final private static String TP_SEQ_ANNOTATION_REF = "SEQ_ANNOTATION_REF"; + final private static String TP_SEQ_MOL_SEQ = "SEQ_MOL_SEQ"; + final private static String TP_SEQ_NAME = "SEQ_NAME"; + final private static String TP_NODE_NAME = "NODE_NAME"; + public final static boolean SANITIZE = false; + public final static boolean VERBOSE = true; private PhylogenyDecorator() { // Not needed. @@ -162,21 +158,6 @@ public final class PhylogenyDecorator { } } - /** - * - * - * - * - * - * @param phylogeny - * @param map - * maps names (in phylogeny) to new values - * @param field - * @param picky - * @throws IllegalArgumentException - * @throws NHXFormatException - * @throws PhyloXmlDataFormatException - */ public static void decorate( final Phylogeny phylogeny, final Map map, final FIELD field, @@ -244,24 +225,6 @@ public final class PhylogenyDecorator { if ( intermediate_map != null ) { name = PhylogenyDecorator.extractIntermediate( intermediate_map, name ); } - // int space_index = name.indexOf( " " ); - // if ( CUT && space_index > 0 ) { - // int y = name.lastIndexOf( "|" ); - // name = name.substring( y + 1, space_index ); - // } - // String new_value = null; - // for( String key : map.keySet() ) { - // if ( key.indexOf( name ) >= 0 ) { - // if ( new_value == null ) { - // new_value = map.get( key ); - // } - // else { - // System.out.println( name + " is not unique" ); - // System.exit( -1 ); - // } - // } - // } - // if ( new_value != null ) { if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) { String new_value = map.get( name ); int x = 0; @@ -433,6 +396,30 @@ public final class PhylogenyDecorator { } } + public static Map> parseMappingTable( final File mapping_table_file ) + throws IOException { + final Map> map = new HashMap>(); + BasicTable mapping_table = null; + mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false, false ); + for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) { + final Map row_map = new HashMap(); + String name = null; + for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) { + final String table_cell = mapping_table.getValue( col, row ); + if ( col == 0 ) { + name = table_cell; + } + else if ( table_cell != null ) { + final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) ); + final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() ); + row_map.put( key, val ); + } + } + map.put( name, row_map ); + } + return map; + } + private static String deleteAtFirstSpace( final String name ) { final int first_space = name.indexOf( " " ); if ( first_space > 1 ) { @@ -482,30 +469,6 @@ public final class PhylogenyDecorator { return new_name; } - public static Map> parseMappingTable( final File mapping_table_file ) - throws IOException { - final Map> map = new HashMap>(); - BasicTable mapping_table = null; - mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false, false ); - for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) { - final Map row_map = new HashMap(); - String name = null; - for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) { - final String table_cell = mapping_table.getValue( col, row ); - if ( col == 0 ) { - name = table_cell; - } - else if ( table_cell != null ) { - final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) ); - final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() ); - row_map.put( key, val ); - } - } - map.put( name, row_map ); - } - return map; - } - private static String processNameIntelligently( final String name ) { final String[] s = name.split( " " ); if ( s.length < 2 ) { -- 1.7.10.2