decorator fixed
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 21 Dec 2012 20:57:20 +0000 (20:57 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 21 Dec 2012 20:57:20 +0000 (20:57 +0000)
forester/java/src/org/forester/application/decorator.java
forester/java/src/org/forester/tools/PhylogenyDecorator.java

index 18966af..8084ee2 100644 (file)
@@ -71,54 +71,8 @@ public final class decorator {
     final static private String MAPPING_FILE_SEPARATOR_OPTION           = "s";
     final static private String MAPPING_FILE_SEPARATOR_DEFAULT          = ": ";
     final static private String PRG_NAME                                = "decorator";
-    final static private String PRG_VERSION                             = "1.11";
-    final static private String PRG_DATE                                = "2012.09.15";
-
-    private static void argumentsError() {
-        System.out.println();
-        System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f=<c> <phylogenies infile> "
-                + "[mapping table file] <phylogenies outfile>" );
-        System.out.println();
-        System.out.println( "options:" );
-        System.out.println();
-        System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=<c>)" );
-        System.out.println( " -r=<n> : allow to remove up to n characters from the end of the names" );
-        System.out.println( "          in phylogenies infile if not found (in map) otherwise" );
-        System.out.println( " -p     : picky, fails if node name not found in mapping table" );
-        System.out.println( " -" + TREE_NAME_OPTION + "=<s>: name for the phylogeny" );
-        System.out.println( " -" + TREE_ID_OPTION + "=<s>: identifier for the phylogeny (in the form provider:value)" );
-        System.out.println( " -" + TREE_DESC_OPTION + "=<s>: description for phylogenies" );
-        System.out.println();
-        System.out.println();
-        System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" );
-        System.out.println();
-        System.out.println( " -f=<c> : field to be replaced: " + NODE_NAME_FIELD + " : node name" );
-        System.out.println( "                                " + SEQUENCE_ANNOTATION_DESC
-                + " : sequence annotation description" );
-        System.out.println( "                                " + DS_FILED + " : domain structure" );
-        System.out.println( "                                " + TAXONOMY_CODE_FIELD + " : taxonomy code" );
-        System.out.println( "                                " + TAXONOMY_SCIENTIFIC_NAME_FIELD
-                + ": taxonomy scientific name" );
-        System.out.println( "                                " + SEQUENCE_NAME_FIELD + " : sequence name" );
-        System.out.println( " -k=<n> : key column in mapping table (0 based)," );
-        System.out.println( "          names of the node to be decorated - default is 0" );
-        System.out.println( " -v=<n> : value column in mapping table (0 based)," );
-        System.out.println( "          data which with to decorate - default is 1" );
-        System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION
-                + "    : to extract bracketed scientific names, e.g. [Nematostella vectensis]" );
-        System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION
-                + "    : to extract bracketed taxonomic codes, e.g. [NEMVE]" );
-        System.out.println( " -s=<c> : column separator in mapping file, default is \""
-                + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" );
-        System.out.println( " -x     : process name \"intelligently\" (only for -f=n)" );
-        System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION
-                + "    : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" );
-        System.out.println( " -c     : cut name after first space (only for -f=n)" );
-        System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION
-                + "     : trim node name to be replaced after tilde" );
-        System.out.println();
-        System.exit( -1 );
-    }
+    final static private String PRG_VERSION                             = "1.12";
+    final static private String PRG_DATE                                = "2012.12.21";
 
     public static void main( final String args[] ) {
         ForesterUtil.printProgramInformation( decorator.PRG_NAME, decorator.PRG_VERSION, decorator.PRG_DATE );
@@ -153,6 +107,7 @@ public final class decorator {
         allowed_options.add( decorator.VALUE_COLUMN );
         allowed_options.add( decorator.MAPPING_FILE_SEPARATOR_OPTION );
         allowed_options.add( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION );
+        allowed_options.add( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION );
         allowed_options.add( decorator.TREE_NAME_OPTION );
         allowed_options.add( decorator.TREE_ID_OPTION );
         allowed_options.add( decorator.TREE_DESC_OPTION );
@@ -402,4 +357,50 @@ public final class decorator {
         ForesterUtil.programMessage( PRG_NAME, "wrote: " + phylogenies_outfile );
         ForesterUtil.programMessage( PRG_NAME, "OK." );
     }
+
+    private static void argumentsError() {
+        System.out.println();
+        System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f=<c> <phylogenies infile> "
+                + "[mapping table file] <phylogenies outfile>" );
+        System.out.println();
+        System.out.println( "options:" );
+        System.out.println();
+        System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=<c>)" );
+        System.out.println( " -r=<n> : allow to remove up to n characters from the end of the names" );
+        System.out.println( "          in phylogenies infile if not found (in map) otherwise" );
+        System.out.println( " -p     : picky, fails if node name not found in mapping table" );
+        System.out.println( " -" + TREE_NAME_OPTION + "=<s>: name for the phylogeny" );
+        System.out.println( " -" + TREE_ID_OPTION + "=<s>: identifier for the phylogeny (in the form provider:value)" );
+        System.out.println( " -" + TREE_DESC_OPTION + "=<s>: description for phylogenies" );
+        System.out.println();
+        System.out.println();
+        System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" );
+        System.out.println();
+        System.out.println( " -f=<c> : field to be replaced: " + NODE_NAME_FIELD + " : node name" );
+        System.out.println( "                                " + SEQUENCE_ANNOTATION_DESC
+                + " : sequence annotation description" );
+        System.out.println( "                                " + DS_FILED + " : domain structure" );
+        System.out.println( "                                " + TAXONOMY_CODE_FIELD + " : taxonomy code" );
+        System.out.println( "                                " + TAXONOMY_SCIENTIFIC_NAME_FIELD
+                + ": taxonomy scientific name" );
+        System.out.println( "                                " + SEQUENCE_NAME_FIELD + " : sequence name" );
+        System.out.println( " -k=<n> : key column in mapping table (0 based)," );
+        System.out.println( "          names of the node to be decorated - default is 0" );
+        System.out.println( " -v=<n> : value column in mapping table (0 based)," );
+        System.out.println( "          data which with to decorate - default is 1" );
+        System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION
+                + "    : to extract bracketed scientific names, e.g. [Nematostella vectensis]" );
+        System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION
+                + "    : to extract bracketed taxonomic codes, e.g. [NEMVE]" );
+        System.out.println( " -s=<c> : column separator in mapping file, default is \""
+                + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" );
+        System.out.println( " -x     : process name \"intelligently\" (only for -f=n)" );
+        System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION
+                + "    : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" );
+        System.out.println( " -c     : cut name after first space (only for -f=n)" );
+        System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION
+                + "     : trim node name to be replaced after tilde" );
+        System.out.println();
+        System.exit( -1 );
+    }
 }
index 7e70444..f9bcd1f 100644 (file)
@@ -29,7 +29,6 @@ import java.io.File;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.regex.Pattern;
 
 import org.forester.io.parsers.nhx.NHXFormatException;
 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
@@ -48,25 +47,22 @@ import org.forester.util.ForesterUtil;
 public final class PhylogenyDecorator {
 
     // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb:
-    final private static String  TP_TAXONOMY_CODE                   = "TAXONOMY_CODE";
-    final private static String  TP_TAXONOMY_ID                     = "TAXONOMY_ID";
-    final private static String  TP_TAXONOMY_ID_PROVIDER            = "TAXONOMY_ID_PROVIDER";
-    final private static String  TP_TAXONOMY_SN                     = "TAXONOMY_SN";
-    final private static String  TP_TAXONOMY_CN                     = "TAXONOMY_CN";
-    final private static String  TP_TAXONOMY_SYN                    = "TAXONOMY_SYN";
-    final private static String  TP_SEQ_SYMBOL                      = "SEQ_SYMBOL";
-    final private static String  TP_SEQ_ACCESSION                   = "SEQ_ACCESSION";
-    final private static String  TP_SEQ_ACCESSION_SOURCE            = "SEQ_ACCESSION_SOURCE";
-    final private static String  TP_SEQ_ANNOTATION_DESC             = "SEQ_ANNOTATION_DESC";
-    final private static String  TP_SEQ_ANNOTATION_REF              = "SEQ_ANNOTATION_REF";
-    final private static String  TP_SEQ_MOL_SEQ                     = "SEQ_MOL_SEQ";
-    final private static String  TP_SEQ_NAME                        = "SEQ_NAME";
-    final private static String  TP_NODE_NAME                       = "NODE_NAME";
-    final private static Pattern NODENAME_SEQNUMBER_TAXDOMAINNUMBER = Pattern
-                                                                            .compile( "^([a-fA-Z0-9]{1,5})_([A-Z0-9]{2,4}[A-Z])(\\d{1,4})$" );
-    public final static boolean  SANITIZE                           = false;
-    public final static boolean  VERBOSE                            = true;
-    private static final boolean CUT                                = true;
+    final private static String TP_TAXONOMY_CODE        = "TAXONOMY_CODE";
+    final private static String TP_TAXONOMY_ID          = "TAXONOMY_ID";
+    final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER";
+    final private static String TP_TAXONOMY_SN          = "TAXONOMY_SN";
+    final private static String TP_TAXONOMY_CN          = "TAXONOMY_CN";
+    final private static String TP_TAXONOMY_SYN         = "TAXONOMY_SYN";
+    final private static String TP_SEQ_SYMBOL           = "SEQ_SYMBOL";
+    final private static String TP_SEQ_ACCESSION        = "SEQ_ACCESSION";
+    final private static String TP_SEQ_ACCESSION_SOURCE = "SEQ_ACCESSION_SOURCE";
+    final private static String TP_SEQ_ANNOTATION_DESC  = "SEQ_ANNOTATION_DESC";
+    final private static String TP_SEQ_ANNOTATION_REF   = "SEQ_ANNOTATION_REF";
+    final private static String TP_SEQ_MOL_SEQ          = "SEQ_MOL_SEQ";
+    final private static String TP_SEQ_NAME             = "SEQ_NAME";
+    final private static String TP_NODE_NAME            = "NODE_NAME";
+    public final static boolean SANITIZE                = false;
+    public final static boolean VERBOSE                 = true;
 
     private PhylogenyDecorator() {
         // Not needed.
@@ -162,21 +158,6 @@ public final class PhylogenyDecorator {
         }
     }
 
-    /**
-     * 
-     * 
-     * 
-     * 
-     * 
-     * @param phylogeny
-     * @param map
-     *            maps names (in phylogeny) to new values
-     * @param field
-     * @param picky
-     * @throws IllegalArgumentException
-     * @throws NHXFormatException
-     * @throws PhyloXmlDataFormatException 
-     */
     public static void decorate( final Phylogeny phylogeny,
                                  final Map<String, String> map,
                                  final FIELD field,
@@ -244,24 +225,6 @@ public final class PhylogenyDecorator {
                 if ( intermediate_map != null ) {
                     name = PhylogenyDecorator.extractIntermediate( intermediate_map, name );
                 }
-                // int space_index = name.indexOf( " " );
-                //                if ( CUT && space_index > 0 ) {
-                //                    int y = name.lastIndexOf( "|" );
-                //                    name = name.substring( y + 1, space_index );
-                //                }
-                //                String new_value = null;
-                //                for( String key : map.keySet() ) {
-                //                    if ( key.indexOf( name ) >= 0 ) {
-                //                        if ( new_value == null ) {
-                //                            new_value = map.get( key );
-                //                        }
-                //                        else {
-                //                            System.out.println( name + " is not unique" );
-                //                            System.exit( -1 );
-                //                        }
-                //                    }
-                //                }
-                // if ( new_value != null ) {
                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
                     String new_value = map.get( name );
                     int x = 0;
@@ -433,6 +396,30 @@ public final class PhylogenyDecorator {
         }
     }
 
+    public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
+            throws IOException {
+        final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
+        BasicTable<String> mapping_table = null;
+        mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false, false );
+        for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
+            final Map<String, String> row_map = new HashMap<String, String>();
+            String name = null;
+            for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) {
+                final String table_cell = mapping_table.getValue( col, row );
+                if ( col == 0 ) {
+                    name = table_cell;
+                }
+                else if ( table_cell != null ) {
+                    final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) );
+                    final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() );
+                    row_map.put( key, val );
+                }
+            }
+            map.put( name, row_map );
+        }
+        return map;
+    }
+
     private static String deleteAtFirstSpace( final String name ) {
         final int first_space = name.indexOf( " " );
         if ( first_space > 1 ) {
@@ -482,30 +469,6 @@ public final class PhylogenyDecorator {
         return new_name;
     }
 
-    public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
-            throws IOException {
-        final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
-        BasicTable<String> mapping_table = null;
-        mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false, false );
-        for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
-            final Map<String, String> row_map = new HashMap<String, String>();
-            String name = null;
-            for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) {
-                final String table_cell = mapping_table.getValue( col, row );
-                if ( col == 0 ) {
-                    name = table_cell;
-                }
-                else if ( table_cell != null ) {
-                    final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) );
-                    final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() );
-                    row_map.put( key, val );
-                }
-            }
-            map.put( name, row_map );
-        }
-        return map;
-    }
-
     private static String processNameIntelligently( final String name ) {
         final String[] s = name.split( " " );
         if ( s.length < 2 ) {