inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 13 Nov 2013 21:23:54 +0000 (21:23 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 13 Nov 2013 21:23:54 +0000 (21:23 +0000)
forester/java/src/org/forester/application/decorator.java
forester/java/src/org/forester/io/parsers/util/ParserUtils.java
forester/java/src/org/forester/tools/PhylogenyDecorator.java

index c318c76..925ba9e 100644 (file)
 package org.forester.application;
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 
+import org.forester.io.parsers.FastaParser;
 import org.forester.io.parsers.PhylogenyParser;
 import org.forester.io.parsers.util.ParserUtils;
 import org.forester.io.writers.PhylogenyWriter;
@@ -42,6 +45,7 @@ import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY;
 import org.forester.phylogeny.data.Identifier;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.sequence.Sequence;
 import org.forester.tools.PhylogenyDecorator;
 import org.forester.tools.PhylogenyDecorator.FIELD;
 import org.forester.util.BasicTable;
@@ -52,6 +56,7 @@ import org.forester.util.ForesterUtil;
 public final class decorator {
 
     private static final String SEQUENCE_NAME_FIELD                     = "s";
+    private static final String MOL_SEQ                                 = "m";
     private static final String TAXONOMY_CODE_FIELD                     = "c";
     private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD          = "sn";
     private static final String DS_FILED                                = "d";
@@ -60,6 +65,7 @@ public final class decorator {
     final static private String PICKY_OPTION                            = "p";
     final static private String FIELD_OPTION                            = "f";
     final static private String TRIM_AFTER_TILDE_OPTION                 = "t";
+    final static private String VERBOSE_OPTION                          = "ve";
     final static private String TREE_NAME_OPTION                        = "pn";
     final static private String TREE_ID_OPTION                          = "pi";
     final static private String TREE_DESC_OPTION                        = "pd";
@@ -77,8 +83,8 @@ public final class decorator {
     final static private String MAPPING_FILE_SEPARATOR_OPTION           = "s";
     final static private char   MAPPING_FILE_SEPARATOR_DEFAULT          = '\t';
     final static private String PRG_NAME                                = "decorator";
-    final static private String PRG_VERSION                             = "1.14";
-    final static private String PRG_DATE                                = "130426";
+    final static private String PRG_VERSION                             = "1.16";
+    final static private String PRG_DATE                                = "131113";
 
     public static void main( final String args[] ) {
         ForesterUtil.printProgramInformation( decorator.PRG_NAME, decorator.PRG_VERSION, decorator.PRG_DATE );
@@ -120,6 +126,7 @@ public final class decorator {
         allowed_options.add( decorator.TRIM_AFTER_TILDE_OPTION );
         allowed_options.add( decorator.ORDER_TREE_OPTION );
         allowed_options.add( decorator.MIDPOINT_ROOT_OPTION );
+        allowed_options.add( decorator.VERBOSE_OPTION );
         final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
         if ( dissallowed_options.length() > 0 ) {
             ForesterUtil.fatalError( decorator.PRG_NAME, "unknown option(s): " + dissallowed_options );
@@ -154,6 +161,7 @@ public final class decorator {
         boolean trim_after_tilde = false;
         boolean order_tree = false;
         boolean midpoint_root = false;
+        boolean verbose = false;
         String tree_name = "";
         String tree_id = "";
         String tree_desc = "";
@@ -225,6 +233,9 @@ public final class decorator {
             if ( cla.isOptionSet( decorator.ORDER_TREE_OPTION ) ) {
                 order_tree = true;
             }
+            if ( cla.isOptionSet( decorator.VERBOSE_OPTION ) ) {
+                verbose = true;
+            }
             if ( cla.isOptionSet( decorator.FIELD_OPTION ) ) {
                 field_str = cla.getOptionValue( decorator.FIELD_OPTION );
                 if ( field_str.equals( NODE_NAME_FIELD ) ) {
@@ -244,6 +255,9 @@ public final class decorator {
                 else if ( field_str.equals( SEQUENCE_NAME_FIELD ) ) {
                     field = FIELD.SEQUENCE_NAME;
                 }
+                else if ( field_str.equals( MOL_SEQ ) ) {
+                    field = FIELD.MOL_SEQ;
+                }
                 else if ( field_str.equals( TAXONOMY_SCIENTIFIC_NAME_FIELD ) ) {
                     field = FIELD.TAXONOMY_SCIENTIFIC_NAME;
                     extract_bracketed_scientific_name = false;
@@ -291,34 +305,41 @@ public final class decorator {
         }
         Map<String, String> map = null;
         if ( !advanced_table ) {
-            BasicTable<String> mapping_table = null;
-            try {
-                mapping_table = BasicTableParser.parse( mapping_infile, separator, true, false );
-            }
-            catch ( final Exception e ) {
-                ForesterUtil.fatalError( decorator.PRG_NAME,
-                                         "failed to read [" + mapping_infile + "] [" + e.getMessage() + "]" );
-            }
-            if ( ( key_column < 0 ) || ( key_column >= mapping_table.getNumberOfColumns() ) ) {
-                ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for key column" );
-            }
-            if ( ( value_column < 0 ) || ( value_column >= mapping_table.getNumberOfColumns() ) ) {
-                ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for value column" );
-            }
-            if ( mapping_table.isEmpty() || ( mapping_table.getNumberOfColumns() < 1 ) ) {
-                ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table is empty" );
-            }
-            if ( mapping_table.getNumberOfColumns() == 1 ) {
-                ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table has only one column" );
+            if ( field != FIELD.MOL_SEQ ) {
+                BasicTable<String> mapping_table = null;
+                try {
+                    mapping_table = BasicTableParser.parse( mapping_infile, separator, true, false );
+                }
+                catch ( final Exception e ) {
+                    ForesterUtil.fatalError( decorator.PRG_NAME,
+                                             "failed to read [" + mapping_infile + "] [" + e.getMessage() + "]" );
+                }
+                if ( ( key_column < 0 ) || ( key_column >= mapping_table.getNumberOfColumns() ) ) {
+                    ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for key column" );
+                }
+                if ( ( value_column < 0 ) || ( value_column >= mapping_table.getNumberOfColumns() ) ) {
+                    ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for value column" );
+                }
+                if ( mapping_table.isEmpty() || ( mapping_table.getNumberOfColumns() < 1 ) ) {
+                    ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table is empty" );
+                }
+                if ( mapping_table.getNumberOfColumns() == 1 ) {
+                    ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table has only one column" );
+                }
+                map = mapping_table.getColumnsAsMap( key_column, value_column );
+                if ( verbose ) {
+                    final Iterator<Entry<String, String>> iter = map.entrySet().iterator();
+                    System.out.println();
+                    while ( iter.hasNext() ) {
+                        final Entry<String, String> e = iter.next();
+                        System.out.println( e.getKey() + " => " + e.getValue() );
+                    }
+                    System.out.println();
+                }
             }
-            map = mapping_table.getColumnsAsMap( key_column, value_column );
-            final Iterator<Entry<String, String>> iter = map.entrySet().iterator();
-            System.out.println();
-            while ( iter.hasNext() ) {
-                final Entry<String, String> e = iter.next();
-                System.out.println( e.getKey() + " => " + e.getValue() );
+            else {
+                map = readFastaFileIntoMap( mapping_infile, verbose );
             }
-            System.out.println();
         }
         if ( !ForesterUtil.isEmpty( tree_name ) || !ForesterUtil.isEmpty( tree_id )
                 || !ForesterUtil.isEmpty( tree_desc ) ) {
@@ -366,7 +387,8 @@ public final class decorator {
                                              process_name_intelligently,
                                              process_similar_to,
                                              numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                             trim_after_tilde );
+                                             trim_after_tilde,
+                                             verbose );
             }
         }
         catch ( final NullPointerException e ) {
@@ -397,10 +419,44 @@ public final class decorator {
         ForesterUtil.programMessage( PRG_NAME, "OK." );
     }
 
+    private static Map<String, String> readFastaFileIntoMap( final File mapping_infile, final boolean verbose ) {
+        List<Sequence> seqs = null;
+        try {
+            seqs = FastaParser.parse( new FileInputStream( mapping_infile ) );
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read fasta-file from [" + mapping_infile + "] ["
+                    + e.getMessage() + "]" );
+        }
+        if ( ForesterUtil.isEmpty( seqs ) ) {
+            ForesterUtil.fatalError( decorator.PRG_NAME, "fasta-file [" + mapping_infile
+                    + "] is devoid of fasta-formatted sequences" );
+        }
+        final Map<String, String> map = new HashMap<String, String>();
+        for( final Sequence seq : seqs ) {
+            if ( ForesterUtil.isEmpty( seq.getIdentifier() ) ) {
+                ForesterUtil.fatalError( decorator.PRG_NAME, "fasta-file [" + mapping_infile
+                        + "] contains sequence with empty identifier" );
+            }
+            if ( map.containsKey( seq.getIdentifier() ) ) {
+                ForesterUtil.fatalError( decorator.PRG_NAME, "sequence identifier [" + seq.getIdentifier()
+                        + "] is not unique" );
+            }
+            if ( seq.getLength() < 1 ) {
+                ForesterUtil.fatalError( decorator.PRG_NAME, "sequence [" + seq.getIdentifier() + "] is empty" );
+            }
+            map.put( seq.getIdentifier(), seq.getMolecularSequenceAsString() );
+            if ( verbose ) {
+                System.out.println( seq.getIdentifier() + " => " + seq.getMolecularSequenceAsString() );
+            }
+        }
+        return map;
+    }
+
     private static void argumentsError() {
         System.out.println();
         System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f=<c> <phylogenies infile> "
-                + "[mapping table file] <phylogenies outfile>" );
+                + "[mapping table file|fasta-file] <phylogenies outfile>" );
         System.out.println();
         System.out.println( "options:" );
         System.out.println();
@@ -423,6 +479,7 @@ public final class decorator {
         System.out.println( "                                " + TAXONOMY_SCIENTIFIC_NAME_FIELD
                 + ": taxonomy scientific name" );
         System.out.println( "                                " + SEQUENCE_NAME_FIELD + " : sequence name" );
+        System.out.println( "                                " + MOL_SEQ + " : molecular sequence" );
         System.out.println( " -k=<n> : key column in mapping table (0 based)," );
         System.out.println( "          names of the node to be decorated - default is 0" );
         System.out.println( " -v=<n> : value column in mapping table (0 based)," );
@@ -439,8 +496,9 @@ public final class decorator {
         System.out.println( " -c     : cut name after first space (only for -f=n)" );
         System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION
                 + "     : trim node name to be replaced after tilde" );
-        System.out.println( " -" + decorator.MIDPOINT_ROOT_OPTION + "     : to midpoint-root the tree" );
-        System.out.println( " -" + decorator.ORDER_TREE_OPTION + "     : to order tree branches" );
+        System.out.println( " -" + decorator.MIDPOINT_ROOT_OPTION + "    : to midpoint-root the tree" );
+        System.out.println( " -" + decorator.ORDER_TREE_OPTION + "    : to order tree branches" );
+        System.out.println( " -" + decorator.VERBOSE_OPTION + "    : verbose" );
         System.out.println();
         System.exit( -1 );
     }
index faa0918..7931248 100644 (file)
@@ -63,9 +63,7 @@ public final class ParserUtils {
     final public static Pattern  TAXOMONY_CODE_PATTERN_PFR       = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_("
                                                                          + TAX_CODE + ")\\b" );
     final public static Pattern  TAXOMONY_CODE_PATTERN_A         = Pattern.compile( "(?:\\b|_)(" + TAX_CODE + ")\\b" );
-    final public static Pattern  TAXOMONY_CODE_PATTERN_4         = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" );
-    final public static Pattern  TAXOMONY_CODE_PATTERN_6         = Pattern
-                                                                         .compile( "\\[([A-Z9][A-Z]{2}[A-Z0-9]{3})\\]" );
+    final public static Pattern  TAXOMONY_CODE_PATTERN_BRACKETED = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" );
     final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_A   = Pattern.compile( "(?:\\b|_)(\\d{1,7})\\b" );
     final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern
                                                                          .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" );
index 57ef9f0..27a5b03 100644 (file)
@@ -48,23 +48,22 @@ import org.forester.util.ForesterUtil;
 
 public final class PhylogenyDecorator {
 
-    // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb:
-    final private static String TP_TAXONOMY_CODE        = "TAXONOMY_CODE";
-    final private static String TP_TAXONOMY_ID          = "TAXONOMY_ID";
-    final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER";
-    final private static String TP_TAXONOMY_SN          = "TAXONOMY_SN";
-    final private static String TP_TAXONOMY_CN          = "TAXONOMY_CN";
-    final private static String TP_TAXONOMY_SYN         = "TAXONOMY_SYN";
-    final private static String TP_SEQ_SYMBOL           = "SEQ_SYMBOL";
+    public final static boolean SANITIZE                = false;
+    final private static String TP_NODE_NAME            = "NODE_NAME";
     final private static String TP_SEQ_ACCESSION        = "SEQ_ACCESSION";
     final private static String TP_SEQ_ACCESSION_SOURCE = "SEQ_ACCESSION_SOURCE";
     final private static String TP_SEQ_ANNOTATION_DESC  = "SEQ_ANNOTATION_DESC";
     final private static String TP_SEQ_ANNOTATION_REF   = "SEQ_ANNOTATION_REF";
     final private static String TP_SEQ_MOL_SEQ          = "SEQ_MOL_SEQ";
     final private static String TP_SEQ_NAME             = "SEQ_NAME";
-    final private static String TP_NODE_NAME            = "NODE_NAME";
-    public final static boolean SANITIZE                = false;
-    public final static boolean VERBOSE                 = true;
+    final private static String TP_SEQ_SYMBOL           = "SEQ_SYMBOL";
+    final private static String TP_TAXONOMY_CN          = "TAXONOMY_CN";
+    // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb:
+    final private static String TP_TAXONOMY_CODE        = "TAXONOMY_CODE";
+    final private static String TP_TAXONOMY_ID          = "TAXONOMY_ID";
+    final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER";
+    final private static String TP_TAXONOMY_SN          = "TAXONOMY_SN";
+    final private static String TP_TAXONOMY_SYN         = "TAXONOMY_SYN";
 
     private PhylogenyDecorator() {
         // Not needed.
@@ -170,7 +169,8 @@ public final class PhylogenyDecorator {
                                  final boolean process_name_intelligently,
                                  final boolean process_similar_to,
                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                 final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
+                                 final boolean trim_after_tilde,
+                                 final boolean verbose ) throws IllegalArgumentException, NHXFormatException,
             PhyloXmlDataFormatException {
         PhylogenyDecorator.decorate( phylogeny,
                                      map,
@@ -183,7 +183,8 @@ public final class PhylogenyDecorator {
                                      process_name_intelligently,
                                      process_similar_to,
                                      numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                     trim_after_tilde );
+                                     trim_after_tilde,
+                                     verbose );
     }
 
     /**
@@ -212,8 +213,8 @@ public final class PhylogenyDecorator {
                                  final boolean process_name_intelligently,
                                  final boolean process_similar_to,
                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                 final boolean trim_after_tilde ) throws IllegalArgumentException,
-            PhyloXmlDataFormatException {
+                                 final boolean trim_after_tilde,
+                                 final boolean verbose ) throws IllegalArgumentException, PhyloXmlDataFormatException {
         if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
             throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" );
         }
@@ -231,7 +232,7 @@ public final class PhylogenyDecorator {
             }
             if ( !ForesterUtil.isEmpty( name ) ) {
                 if ( intermediate_map != null ) {
-                    name = PhylogenyDecorator.extractIntermediate( intermediate_map, name );
+                    name = PhylogenyDecorator.extractIntermediate( intermediate_map, name, verbose );
                 }
                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
                     String new_value = map.get( name );
@@ -248,20 +249,26 @@ public final class PhylogenyDecorator {
                             new_value = extractBracketedScientificNames( node, new_value );
                         }
                         else if ( extract_bracketed_tax_code ) {
-                            if ( ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value ).find() ) {
+                            if ( ParserUtils.TAXOMONY_CODE_PATTERN_BRACKETED.matcher( new_value ).find() ) {
                                 new_value = extractBracketedTaxCodes( node, new_value );
                             }
-                            else if ( ParserUtils.TAXOMONY_CODE_PATTERN_6.matcher( new_value ).find() ) {
-                                new_value = extractBracketedTaxCodes6( node, new_value );
-                            }
                             else if ( picky ) {
                                 throw new IllegalArgumentException( " could not get taxonomy from \"" + new_value
                                         + "\"" );
                             }
                         }
                         switch ( field ) {
+                            case MOL_SEQ:
+                                if ( verbose ) {
+                                    System.out.println( name + ": " + new_value );
+                                }
+                                if ( !node.getNodeData().isHasSequence() ) {
+                                    node.getNodeData().setSequence( new Sequence() );
+                                }
+                                node.getNodeData().getSequence().setMolecularSequence( new_value );
+                                break;
                             case SEQUENCE_ANNOTATION_DESC:
-                                if ( PhylogenyDecorator.VERBOSE ) {
+                                if ( verbose ) {
                                     System.out.println( name + ": " + new_value );
                                 }
                                 if ( !node.getNodeData().isHasSequence() ) {
@@ -272,7 +279,7 @@ public final class PhylogenyDecorator {
                                 node.getNodeData().getSequence().addAnnotation( annotation );
                                 break;
                             case DOMAIN_STRUCTURE:
-                                if ( PhylogenyDecorator.VERBOSE ) {
+                                if ( verbose ) {
                                     System.out.println( name + ": " + new_value );
                                 }
                                 if ( !node.getNodeData().isHasSequence() ) {
@@ -282,14 +289,14 @@ public final class PhylogenyDecorator {
                                         .setDomainArchitecture( new DomainArchitecture( new_value ) );
                                 break;
                             case TAXONOMY_CODE:
-                                if ( PhylogenyDecorator.VERBOSE ) {
+                                if ( verbose ) {
                                     System.out.println( name + ": " + new_value );
                                 }
                                 ForesterUtil.ensurePresenceOfTaxonomy( node );
                                 node.getNodeData().getTaxonomy().setTaxonomyCode( new_value );
                                 break;
                             case TAXONOMY_SCIENTIFIC_NAME:
-                                if ( PhylogenyDecorator.VERBOSE ) {
+                                if ( verbose ) {
                                     System.out.println( name + ": " + new_value );
                                 }
                                 ForesterUtil.ensurePresenceOfTaxonomy( node );
@@ -299,7 +306,7 @@ public final class PhylogenyDecorator {
                                 if ( trim_after_tilde ) {
                                     new_value = addTildeAnnotation( tilde_annotation, new_value );
                                 }
-                                if ( PhylogenyDecorator.VERBOSE ) {
+                                if ( verbose ) {
                                     System.out.println( name + ": " + new_value );
                                 }
                                 if ( !node.getNodeData().isHasSequence() ) {
@@ -308,23 +315,23 @@ public final class PhylogenyDecorator {
                                 node.getNodeData().getSequence().setName( new_value );
                                 break;
                             case NODE_NAME:
-                                if ( PhylogenyDecorator.VERBOSE ) {
+                                if ( verbose ) {
                                     System.out.print( name + " -> " );
                                 }
                                 if ( cut_name_after_space ) {
-                                    if ( PhylogenyDecorator.VERBOSE ) {
+                                    if ( verbose ) {
                                         System.out.print( new_value + " -> " );
                                     }
                                     new_value = PhylogenyDecorator.deleteAtFirstSpace( new_value );
                                 }
                                 else if ( process_name_intelligently ) {
-                                    if ( PhylogenyDecorator.VERBOSE ) {
+                                    if ( verbose ) {
                                         System.out.print( new_value + " -> " );
                                     }
                                     new_value = PhylogenyDecorator.processNameIntelligently( new_value );
                                 }
                                 else if ( process_similar_to ) {
-                                    if ( PhylogenyDecorator.VERBOSE ) {
+                                    if ( verbose ) {
                                         System.out.print( new_value + " -> " );
                                     }
                                     new_value = PhylogenyDecorator.processSimilarTo( new_value );
@@ -335,7 +342,7 @@ public final class PhylogenyDecorator {
                                 if ( trim_after_tilde ) {
                                     new_value = addTildeAnnotation( tilde_annotation, new_value );
                                 }
-                                if ( PhylogenyDecorator.VERBOSE ) {
+                                if ( verbose ) {
                                     System.out.println( new_value );
                                 }
                                 node.setName( new_value );
@@ -352,13 +359,6 @@ public final class PhylogenyDecorator {
         }
     }
 
-    private final static String addTildeAnnotation( final String tilde_annotation, final String new_value ) {
-        if ( ForesterUtil.isEmpty( tilde_annotation ) ) {
-            return new_value;
-        }
-        return new_value + tilde_annotation;
-    }
-
     public static void decorate( final Phylogeny[] phylogenies,
                                  final Map<String, Map<String, String>> map,
                                  final boolean picky,
@@ -380,7 +380,8 @@ public final class PhylogenyDecorator {
                                  final boolean process_name_intelligently,
                                  final boolean process_similar_to,
                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                 final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
+                                 final boolean trim_after_tilde,
+                                 final boolean verbose ) throws IllegalArgumentException, NHXFormatException,
             PhyloXmlDataFormatException {
         for( final Phylogeny phylogenie : phylogenies ) {
             PhylogenyDecorator.decorate( phylogenie,
@@ -393,7 +394,8 @@ public final class PhylogenyDecorator {
                                          process_name_intelligently,
                                          process_similar_to,
                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                         trim_after_tilde );
+                                         trim_after_tilde,
+                                         verbose );
         }
     }
 
@@ -408,7 +410,8 @@ public final class PhylogenyDecorator {
                                  final boolean process_name_intelligently,
                                  final boolean process_similar_to,
                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                 final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
+                                 final boolean trim_after_tilde,
+                                 final boolean verbose ) throws IllegalArgumentException, NHXFormatException,
             PhyloXmlDataFormatException {
         for( final Phylogeny phylogenie : phylogenies ) {
             PhylogenyDecorator.decorate( phylogenie,
@@ -422,7 +425,8 @@ public final class PhylogenyDecorator {
                                          process_name_intelligently,
                                          process_similar_to,
                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
-                                         trim_after_tilde );
+                                         trim_after_tilde,
+                                         verbose );
         }
     }
 
@@ -450,6 +454,13 @@ public final class PhylogenyDecorator {
         return map;
     }
 
+    private final static String addTildeAnnotation( final String tilde_annotation, final String new_value ) {
+        if ( ForesterUtil.isEmpty( tilde_annotation ) ) {
+            return new_value;
+        }
+        return new_value + tilde_annotation;
+    }
+
     private static String deleteAtFirstSpace( final String name ) {
         final int first_space = name.indexOf( " " );
         if ( first_space > 1 ) {
@@ -467,48 +478,37 @@ public final class PhylogenyDecorator {
     }
 
     private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) {
-        final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value );
-        String tc = "?";
-        if ( m.find() ) {
-            tc = m.group( 1 );
-        }
-        ForesterUtil.ensurePresenceOfTaxonomy( node );
-        try {
-            node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
-        }
-        catch ( final PhyloXmlDataFormatException e ) {
-            throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
+        final StringBuilder sb = new StringBuilder();
+        sb.append( new_value );
+        final String tc = extractBracketedTaxCodes( sb );
+        if ( !ForesterUtil.isEmpty( tc ) ) {
+            ForesterUtil.ensurePresenceOfTaxonomy( node );
+            try {
+                node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
+            }
+            catch ( final PhyloXmlDataFormatException e ) {
+                throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
+            }
+            return sb.toString().trim();
         }
-        return new_value; //TODO //FIXME
+        return new_value;
     }
 
-    private static String extractBracketedTaxCodes6( final PhylogenyNode node, final String new_value ) {
-        final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_6.matcher( new_value );
-        String tc = "?";
+    private static String extractBracketedTaxCodes( final StringBuilder sb ) {
+        final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_BRACKETED.matcher( sb );
         if ( m.find() ) {
-            tc = m.group( 1 );
-        }
-        ForesterUtil.ensurePresenceOfTaxonomy( node );
-        try {
-            if ( tc.length() == 6 ) {
-                final String t = tc.substring( 0, 5 );
-                System.out.println( "WARNING: taxonomy code " + tc + " -> " + t );
-                tc = t;
-            }
-            else {
-                throw new IllegalArgumentException();
-            }
-            node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
-        }
-        catch ( final PhyloXmlDataFormatException e ) {
-            throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
+            final String tc = m.group( 1 );
+            sb.delete( m.start( 1 ) - 1, m.end( 1 ) + 1 );
+            return tc;
         }
-        return new_value; //TODO //FIXME
+        return null;
     }
 
-    private static String extractIntermediate( final Map<String, String> intermediate_map, final String name ) {
+    private static String extractIntermediate( final Map<String, String> intermediate_map,
+                                               final String name,
+                                               final boolean verbose ) {
         String new_name = null;
-        if ( PhylogenyDecorator.VERBOSE ) {
+        if ( verbose ) {
             System.out.print( name + " => " );
         }
         if ( intermediate_map.containsKey( name ) ) {
@@ -520,7 +520,7 @@ public final class PhylogenyDecorator {
         else {
             throw new IllegalArgumentException( "\"" + name + "\" not found in name secondary map" );
         }
-        if ( PhylogenyDecorator.VERBOSE ) {
+        if ( verbose ) {
             System.out.println( new_name + "  " );
         }
         return new_name;
@@ -575,6 +575,12 @@ public final class PhylogenyDecorator {
     }
 
     public static enum FIELD {
-        NODE_NAME, SEQUENCE_ANNOTATION_DESC, DOMAIN_STRUCTURE, TAXONOMY_CODE, TAXONOMY_SCIENTIFIC_NAME, SEQUENCE_NAME;
+        DOMAIN_STRUCTURE,
+        MOL_SEQ,
+        NODE_NAME,
+        SEQUENCE_ANNOTATION_DESC,
+        SEQUENCE_NAME,
+        TAXONOMY_CODE,
+        TAXONOMY_SCIENTIFIC_NAME;
     }
 }