in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 12 Oct 2012 02:36:51 +0000 (02:36 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 12 Oct 2012 02:36:51 +0000 (02:36 +0000)
forester/java/src/org/forester/application/decorator.java
forester/java/src/org/forester/tools/PhylogenyDecorator.java

index 88296e9..f1b2fbc 100644 (file)
@@ -47,32 +47,32 @@ import org.forester.util.ForesterUtil;
 
 public final class decorator {
 
-    private static final String SEQUENCE_NAME_FIELD                    = "s";
-    private static final String TAXONOMY_CODE_FIELD                    = "c";
-    private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD         = "sn";
-    private static final String DS_FILED                               = "d";
-    private static final String SEQUENCE_ANNOTATION_DESC               = "a";
-    private static final String NODE_NAME_FIELD                        = "n";
-    final static private String PICKY_OPTION                           = "p";
-    final static private String FIELD_OPTION                           = "f";
-    final static private String TRIM_AFTER_TILDE_OPTION                = "t";
-    final static private String MOVE_DOMAIN_NUMBER_OPTION              = "mdn";       // Hidden expert option.
-    final static private String TREE_NAME_OPTION                       = "pn";
-    final static private String TREE_ID_OPTION                         = "pi";
-    final static private String TREE_DESC_OPTION                       = "pd";
-    final static private String EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION = "sn";
-    final static private String PROCESS_NAME_INTELLIGENTLY_OPTION      = "x";
-    final static private String PROCESS_SIMILAR_TO_OPTION              = "xs";
-    final static private String CUT_NAME_AFTER_FIRST_SPACE_OPTION      = "c";
-    final static private String ALLOW_REMOVAL_OF_CHARS_OPTION          = "r";
-    final static private String ADVANCED_TABLE_OPTION                  = "table";
-    final static private String KEY_COLUMN                             = "k";
-    final static private String VALUE_COLUMN                           = "v";
-    final static private String MAPPING_FILE_SEPARATOR_OPTION          = "s";
-    final static private String MAPPING_FILE_SEPARATOR_DEFAULT         = ": ";
-    final static private String PRG_NAME                               = "decorator";
-    final static private String PRG_VERSION                            = "1.11";
-    final static private String PRG_DATE                               = "2012.09.15";
+    private static final String SEQUENCE_NAME_FIELD                     = "s";
+    private static final String TAXONOMY_CODE_FIELD                     = "c";
+    private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD          = "sn";
+    private static final String DS_FILED                                = "d";
+    private static final String SEQUENCE_ANNOTATION_DESC                = "a";
+    private static final String NODE_NAME_FIELD                         = "n";
+    final static private String PICKY_OPTION                            = "p";
+    final static private String FIELD_OPTION                            = "f";
+    final static private String TRIM_AFTER_TILDE_OPTION                 = "t";
+    final static private String TREE_NAME_OPTION                        = "pn";
+    final static private String TREE_ID_OPTION                          = "pi";
+    final static private String TREE_DESC_OPTION                        = "pd";
+    final static private String EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION  = "sn";
+    final static private String EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION = "tc";
+    final static private String PROCESS_NAME_INTELLIGENTLY_OPTION       = "x";
+    final static private String PROCESS_SIMILAR_TO_OPTION               = "xs";
+    final static private String CUT_NAME_AFTER_FIRST_SPACE_OPTION       = "c";
+    final static private String ALLOW_REMOVAL_OF_CHARS_OPTION           = "r";
+    final static private String ADVANCED_TABLE_OPTION                   = "table";
+    final static private String KEY_COLUMN                              = "k";
+    final static private String VALUE_COLUMN                            = "v";
+    final static private String MAPPING_FILE_SEPARATOR_OPTION           = "s";
+    final static private String MAPPING_FILE_SEPARATOR_DEFAULT          = ": ";
+    final static private String PRG_NAME                                = "decorator";
+    final static private String PRG_VERSION                             = "1.11";
+    final static private String PRG_DATE                                = "2012.09.15";
 
     private static void argumentsError() {
         System.out.println();
@@ -105,7 +105,9 @@ public final class decorator {
         System.out.println( " -v=<n> : value column in mapping table (0 based)," );
         System.out.println( "          data which with to decorate - default is 1" );
         System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION
-                + "    : to extract bracketed scientific names" );
+                + "    : to extract bracketed scientific names, e.g. [Nematostella vectensis]" );
+        System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION
+                + "    : to extract bracketed taxonomic codes, e.g. [NEMVE]" );
         System.out.println( " -s=<c> : column separator in mapping file, default is \""
                 + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" );
         System.out.println( " -x     : process name \"intelligently\" (only for -f=n)" );
@@ -154,7 +156,6 @@ public final class decorator {
         allowed_options.add( decorator.TREE_NAME_OPTION );
         allowed_options.add( decorator.TREE_ID_OPTION );
         allowed_options.add( decorator.TREE_DESC_OPTION );
-        allowed_options.add( decorator.MOVE_DOMAIN_NUMBER_OPTION );
         allowed_options.add( decorator.TRIM_AFTER_TILDE_OPTION );
         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
         if ( dissallowed_options.length() > 0 ) {
@@ -186,7 +187,7 @@ public final class decorator {
         boolean process_name_intelligently = false;
         boolean process_similar_to = false;
         boolean extract_bracketed_scientific_name = false;
-        boolean move_domain_numbers_at_end_to_middle = false;
+        boolean extract_bracketed_tax_code = false;
         boolean trim_after_tilde = false;
         String tree_name = "";
         String tree_id = "";
@@ -207,6 +208,12 @@ public final class decorator {
                 }
                 extract_bracketed_scientific_name = true;
             }
+            if ( cla.isOptionSet( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION ) ) {
+                if ( advanced_table ) {
+                    argumentsError();
+                }
+                extract_bracketed_tax_code = true;
+            }
             if ( cla.isOptionSet( decorator.KEY_COLUMN ) ) {
                 if ( advanced_table ) {
                     argumentsError();
@@ -247,9 +254,6 @@ public final class decorator {
                 numbers_of_chars_allowed_to_remove_if_not_found_in_map = cla
                         .getOptionValueAsInt( decorator.ALLOW_REMOVAL_OF_CHARS_OPTION );
             }
-            if ( cla.isOptionSet( decorator.MOVE_DOMAIN_NUMBER_OPTION ) ) {
-                move_domain_numbers_at_end_to_middle = true;
-            }
             if ( cla.isOptionSet( decorator.FIELD_OPTION ) ) {
                 field_str = cla.getOptionValue( decorator.FIELD_OPTION );
                 if ( field_str.equals( NODE_NAME_FIELD ) ) {
@@ -261,6 +265,7 @@ public final class decorator {
                 else if ( field_str.equals( DS_FILED ) ) {
                     field = FIELD.DOMAIN_STRUCTURE;
                     extract_bracketed_scientific_name = false;
+                    extract_bracketed_tax_code = false;
                 }
                 else if ( field_str.equals( TAXONOMY_CODE_FIELD ) ) {
                     field = FIELD.TAXONOMY_CODE;
@@ -271,6 +276,7 @@ public final class decorator {
                 else if ( field_str.equals( TAXONOMY_SCIENTIFIC_NAME_FIELD ) ) {
                     field = FIELD.TAXONOMY_SCIENTIFIC_NAME;
                     extract_bracketed_scientific_name = false;
+                    extract_bracketed_tax_code = false;
                 }
                 else {
                     ForesterUtil.fatalError( decorator.PRG_NAME, "unknown value for \"" + decorator.FIELD_OPTION
@@ -299,6 +305,9 @@ public final class decorator {
             ForesterUtil.fatalError( decorator.PRG_NAME, "attempt to use -" + decorator.PROCESS_SIMILAR_TO_OPTION
                     + " and -c option together" );
         }
+        if ( extract_bracketed_scientific_name && extract_bracketed_tax_code ) {
+            argumentsError();
+        }
         Phylogeny[] phylogenies = null;
         try {
             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
@@ -367,12 +376,12 @@ public final class decorator {
                                              map,
                                              field,
                                              extract_bracketed_scientific_name,
+                                             extract_bracketed_tax_code,
                                              picky,
                                              cut_name_after_space,
                                              process_name_intelligently,
                                              process_similar_to,
                                              numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                             move_domain_numbers_at_end_to_middle,
                                              trim_after_tilde );
             }
         }
index 3191201..cf3d280 100644 (file)
@@ -29,7 +29,6 @@ import java.io.File;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.forester.archaeopteryx.AptxUtil;
@@ -183,25 +182,25 @@ public final class PhylogenyDecorator {
                                  final Map<String, String> map,
                                  final FIELD field,
                                  final boolean extract_bracketed_scientific_name,
+                                 final boolean extract_bracketed_tax_code,
                                  final boolean picky,
                                  final boolean cut_name_after_space,
                                  final boolean process_name_intelligently,
                                  final boolean process_similar_to,
                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                 final boolean move_domain_numbers_at_end_to_middle,
                                  final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
             PhyloXmlDataFormatException {
         PhylogenyDecorator.decorate( phylogeny,
                                      map,
                                      field,
                                      extract_bracketed_scientific_name,
+                                     extract_bracketed_tax_code,
                                      picky,
                                      null,
                                      cut_name_after_space,
                                      process_name_intelligently,
                                      process_similar_to,
                                      numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                     move_domain_numbers_at_end_to_middle,
                                      trim_after_tilde );
     }
 
@@ -224,13 +223,13 @@ public final class PhylogenyDecorator {
                                  final Map<String, String> map,
                                  final FIELD field,
                                  final boolean extract_bracketed_scientific_name,
+                                 final boolean extract_bracketed_tax_code,
                                  final boolean picky,
                                  final Map<String, String> intermediate_map,
                                  final boolean cut_name_after_space,
                                  final boolean process_name_intelligently,
                                  final boolean process_similar_to,
                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                 final boolean move_domain_numbers_at_end_to_middle,
                                  final boolean trim_after_tilde ) throws IllegalArgumentException,
             PhyloXmlDataFormatException {
         if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
@@ -278,6 +277,9 @@ public final class PhylogenyDecorator {
                         if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) {
                             new_value = extractBracketedScientificNames( node, new_value );
                         }
+                        else if ( extract_bracketed_tax_code && new_value.endsWith( "]" ) ) {
+                            new_value = extractBracketedTaxCodes( node, new_value );
+                        }
                         switch ( field ) {
                             case SEQUENCE_ANNOTATION_DESC:
                                 if ( PhylogenyDecorator.VERBOSE ) {
@@ -356,9 +358,6 @@ public final class PhylogenyDecorator {
                             default:
                                 throw new RuntimeException( "unknown field \"" + field + "\"" );
                         }
-                        if ( move_domain_numbers_at_end_to_middle && ( field != FIELD.NODE_NAME ) ) {
-                            node.setName( moveDomainNumbersAtEnd( node.getName() ) );
-                        }
                     }
                 }
                 else if ( picky ) {
@@ -385,12 +384,12 @@ public final class PhylogenyDecorator {
                                  final Map<String, String> map,
                                  final FIELD field,
                                  final boolean extract_bracketed_scientific_name,
+                                 final boolean extract_bracketed_tax_code,
                                  final boolean picky,
                                  final boolean cut_name_after_space,
                                  final boolean process_name_intelligently,
                                  final boolean process_similar_to,
                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                 final boolean move_domain_numbers_at_end_to_middle,
                                  final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
             PhyloXmlDataFormatException {
         for( int i = 0; i < phylogenies.length; ++i ) {
@@ -398,12 +397,12 @@ public final class PhylogenyDecorator {
                                          map,
                                          field,
                                          extract_bracketed_scientific_name,
+                                         extract_bracketed_tax_code,
                                          picky,
                                          cut_name_after_space,
                                          process_name_intelligently,
                                          process_similar_to,
                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                         move_domain_numbers_at_end_to_middle,
                                          trim_after_tilde );
         }
     }
@@ -412,13 +411,13 @@ public final class PhylogenyDecorator {
                                  final Map<String, String> map,
                                  final FIELD field,
                                  final boolean extract_bracketed_scientific_name,
+                                 final boolean extract_bracketed_tax_code,
                                  final boolean picky,
                                  final Map<String, String> intermediate_map,
                                  final boolean cut_name_after_space,
                                  final boolean process_name_intelligently,
                                  final boolean process_similar_to,
                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                 final boolean move_domain_numbers_at_end_to_middle,
                                  final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
             PhyloXmlDataFormatException {
         for( int i = 0; i < phylogenies.length; ++i ) {
@@ -426,13 +425,13 @@ public final class PhylogenyDecorator {
                                          map,
                                          field,
                                          extract_bracketed_scientific_name,
+                                         extract_bracketed_tax_code,
                                          picky,
                                          intermediate_map,
                                          cut_name_after_space,
                                          process_name_intelligently,
                                          process_similar_to,
                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                         move_domain_numbers_at_end_to_middle,
                                          trim_after_tilde );
         }
     }
@@ -453,6 +452,19 @@ public final class PhylogenyDecorator {
         return new_value.substring( 0, i - 1 ).trim();
     }
 
+    private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) {
+        final int i = new_value.lastIndexOf( "[" );
+        final String tc = new_value.substring( i + 1, new_value.length() - 1 );
+        AptxUtil.ensurePresenceOfTaxonomy( node );
+        try {
+            node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
+        }
+        catch ( final PhyloXmlDataFormatException e ) {
+            throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
+        }
+        return new_value.substring( 0, i - 1 ).trim();
+    }
+
     private static String extractIntermediate( final Map<String, String> intermediate_map, final String name ) {
         String new_name = null;
         if ( PhylogenyDecorator.VERBOSE ) {
@@ -473,19 +485,6 @@ public final class PhylogenyDecorator {
         return new_name;
     }
 
-    private static String moveDomainNumbersAtEnd( final String node_name ) {
-        final Matcher m = NODENAME_SEQNUMBER_TAXDOMAINNUMBER.matcher( node_name );
-        if ( m.matches() ) {
-            final String seq_number = m.group( 1 );
-            final String tax = m.group( 2 );
-            final String domain_number = m.group( 3 );
-            return seq_number + "_[" + domain_number + "]_" + tax;
-        }
-        else {
-            return node_name;
-        }
-    }
-
     public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
             throws IOException {
         final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();