in progress...
authorcmzmasek <chris.zma@outlook.com>
Mon, 11 Sep 2017 23:02:28 +0000 (16:02 -0700)
committercmzmasek <chris.zma@outlook.com>
Mon, 11 Sep 2017 23:02:28 +0000 (16:02 -0700)
forester/java/src/org/forester/application/cladinator.java
forester/java/src/org/forester/clade_analysis/AnalysisMulti.java
forester/java/src/org/forester/clade_analysis/ResultMulti.java
forester/java/src/org/forester/util/UserException.java [new file with mode: 0644]

index d1b4f44..d7f6e48 100644 (file)
@@ -31,7 +31,6 @@ import java.text.DecimalFormat;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.SortedMap;
-import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
@@ -41,34 +40,39 @@ import org.forester.clade_analysis.ResultMulti;
 import org.forester.io.parsers.PhylogenyParser;
 import org.forester.io.parsers.util.ParserUtils;
 import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
-import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.util.BasicTable;
 import org.forester.util.BasicTableParser;
 import org.forester.util.CommandLineArguments;
 import org.forester.util.EasyWriter;
 import org.forester.util.ForesterUtil;
+import org.forester.util.UserException;
 
 public final class cladinator {
 
-    final static private String        PRG_NAME                 = "cladinator";
-    final static private String        PRG_VERSION              = "1.01";
-    final static private String        PRG_DATE                 = "170906";
-    final static private String        PRG_DESC                 = "clades within clades of annotated labels -- analysis of pplacer-type outputs";
-    final static private String        E_MAIL                   = "phyloxml@gmail.com";
-    final static private String        WWW                      = "https://sites.google.com/site/cmzmasek/home/software/forester";
-    final static private String        HELP_OPTION_1            = "help";
-    final static private String        HELP_OPTION_2            = "h";
-    final static private String        SEP_OPTION               = "s";
-    final static private String        QUERY_PATTERN_OPTION     = "q";
-    final static private String        SPECIFICS_CUTOFF_OPTION  = "c";
-    final static private String        MAPPING_FILE_OPTION      = "m";
-    final static private double        SPECIFICS_CUTOFF_DEFAULT = 0.8;
-    final static private String        SEP_DEFAULT              = ".";
-    final static private Pattern       QUERY_PATTERN_DEFAULT    = AnalysisMulti.DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE;
-    private final static DecimalFormat df                       = new DecimalFormat( "0.0#######" );
+    final static private String        PRG_NAME                             = "cladinator";
+    final static private String        PRG_VERSION                          = "1.02";
+    final static private String        PRG_DATE                             = "170912";
+    final static private String        PRG_DESC                             = "clades within clades of annotated labels -- analysis of pplacer-type outputs";
+    final static private String        E_MAIL                               = "phyloxml@gmail.com";
+    final static private String        WWW                                  = "https://sites.google.com/site/cmzmasek/home/software/forester";
+    final static private String        HELP_OPTION_1                        = "help";
+    final static private String        HELP_OPTION_2                        = "h";
+    final static private String        SEP_OPTION                           = "s";
+    final static private String        QUERY_PATTERN_OPTION                 = "q";
+    final static private String        SPECIFICS_CUTOFF_OPTION              = "c";
+    final static private String        MAPPING_FILE_OPTION                  = "m";
+    final static private String        EXTRA_PROCESSING_OPTION1             = "x";
+    final static private String        EXTRA_PROCESSING1_SEP_OPTION         = "xs";
+    final static private String        EXTRA_PROCESSING1_KEEP_EXTRA_OPTION  = "xk";
+    final static private String        VERBOSE_OPTION                       = "v";
+    final static private double        SPECIFICS_CUTOFF_DEFAULT             = 0.8;
+    final static private String        SEP_DEFAULT                          = ".";
+    final static private Pattern       QUERY_PATTERN_DEFAULT                = AnalysisMulti.DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE;
+    final static private String        EXTRA_PROCESSING1_SEP_DEFAULT        = "|";
+    final static private boolean       EXTRA_PROCESSING1_KEEP_EXTRA_DEFAULT = false;
+    private final static DecimalFormat df                                   = new DecimalFormat( "0.0#######" );
 
     public static void main( final String args[] ) {
         try {
@@ -100,6 +104,10 @@ public final class cladinator {
             allowed_options.add( QUERY_PATTERN_OPTION );
             allowed_options.add( SPECIFICS_CUTOFF_OPTION );
             allowed_options.add( MAPPING_FILE_OPTION );
+            allowed_options.add( EXTRA_PROCESSING_OPTION1 );
+            allowed_options.add( EXTRA_PROCESSING1_SEP_OPTION );
+            allowed_options.add( EXTRA_PROCESSING1_KEEP_EXTRA_OPTION );
+            allowed_options.add( VERBOSE_OPTION );
             final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
             if ( dissallowed_options.length() > 0 ) {
                 ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
@@ -186,6 +194,47 @@ public final class cladinator {
                 t = null;
                 map = null;
             }
+            final boolean extra_processing1;
+            if ( cla.isOptionSet( EXTRA_PROCESSING_OPTION1 ) ) {
+                extra_processing1 = true;
+            }
+            else {
+                extra_processing1 = false;
+            }
+            String extra_processing1_sep = EXTRA_PROCESSING1_SEP_DEFAULT;
+            if ( cla.isOptionSet( EXTRA_PROCESSING1_SEP_OPTION ) ) {
+                if ( !extra_processing1 ) {
+                    ForesterUtil.fatalError( PRG_NAME,
+                                             "extra processing is not enabled, cannot set -"
+                                                     + EXTRA_PROCESSING1_SEP_OPTION + " option" );
+                }
+                if ( cla.isOptionValueSet( EXTRA_PROCESSING1_SEP_OPTION ) ) {
+                    extra_processing1_sep = cla.getOptionValue( EXTRA_PROCESSING1_SEP_OPTION );
+                }
+                else {
+                    ForesterUtil.fatalError( PRG_NAME, "no value for extra processing separator" );
+                }
+            }
+            if ( extra_processing1_sep != null && extra_processing1_sep.equals( separator ) ) {
+                ForesterUtil.fatalError( PRG_NAME,
+                                         "extra processing separator must not be the same the annotation-separator" );
+            }
+            boolean extra_processing1_keep = EXTRA_PROCESSING1_KEEP_EXTRA_DEFAULT;
+            if ( cla.isOptionSet( EXTRA_PROCESSING1_KEEP_EXTRA_OPTION ) ) {
+                if ( !extra_processing1 ) {
+                    ForesterUtil.fatalError( PRG_NAME,
+                                             "extra processing is not enabled, cannot set -"
+                                                     + EXTRA_PROCESSING1_KEEP_EXTRA_OPTION + " option" );
+                }
+                extra_processing1_keep = true;
+            }
+            final boolean verbose;
+            if ( cla.isOptionSet( VERBOSE_OPTION ) ) {
+                verbose = true;
+            }
+            else {
+                verbose = false;
+            }
             System.out.println( "Input tree                 : " + intreefile );
             System.out.println( "Specific-hit support cutoff: " + cutoff_specifics );
             if ( mapping_file != null ) {
@@ -194,6 +243,11 @@ public final class cladinator {
             }
             System.out.println( "Annotation-separator       : " + separator );
             System.out.println( "Query pattern              : " + pattern );
+            System.out.println( "Extra processing           : " + extra_processing1 );
+            if ( extra_processing1 ) {
+                System.out.println( "Extra processing separator : " + extra_processing1_sep );
+                System.out.println( "Keep extra annotations     : " + extra_processing1_keep );
+            }
             if ( outtablefile != null ) {
                 System.out.println( "Output table               : " + outtablefile );
             }
@@ -209,7 +263,15 @@ public final class cladinator {
             }
             System.out.println( "Ext. nodes in input tree   : " + p.getNumberOfExternalNodes() );
             if ( map != null ) {
-                performMapping( pattern, map, p );
+                AnalysisMulti.performMapping( pattern, map, p, verbose );
+            }
+            if ( extra_processing1 ) {
+                AnalysisMulti.performExtraProcessing1( pattern,
+                                                       p,
+                                                       extra_processing1_sep,
+                                                       extra_processing1_keep,
+                                                       separator,
+                                                       verbose );
             }
             final ResultMulti res = AnalysisMulti.execute( p, pattern, separator, cutoff_specifics );
             printResult( res );
@@ -217,7 +279,7 @@ public final class cladinator {
                 writeResultToTable( res, outtablefile );
             }
         }
-        catch ( final IllegalArgumentException e ) {
+        catch ( final UserException e ) {
             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
         }
         catch ( final IOException e ) {
@@ -229,26 +291,6 @@ public final class cladinator {
         }
     }
 
-    private final static void performMapping( final Pattern pattern,
-                                              final SortedMap<String, String> map,
-                                              Phylogeny p ) {
-        final PhylogenyNodeIterator it = p.iteratorExternalForward();
-        while ( it.hasNext() ) {
-            final PhylogenyNode node = it.next();
-            final String name = node.getName();
-            if ( ForesterUtil.isEmpty( name ) ) {
-                ForesterUtil.fatalError( PRG_NAME, "external node with empty name found" );
-            }
-            final Matcher m = pattern.matcher( name );
-            if ( !m.find() ) {
-                if ( !map.containsKey( name ) ) {
-                    ForesterUtil.fatalError( PRG_NAME, "no mapping for \"" + name + "\" found" );
-                }
-                node.setName( map.get( name ) );
-            }
-        }
-    }
-
     private final static void printResult( final ResultMulti res ) {
         System.out.println();
         System.out.println( "Result:" );
@@ -352,21 +394,31 @@ public final class cladinator {
         System.out.println();
         System.out.println( " options:" );
         System.out.println( "  -" + SPECIFICS_CUTOFF_OPTION
-                + "=<double>: the cutoff for \"specific-hit\" support values (default: " + SPECIFICS_CUTOFF_DEFAULT
-                + ")" );
-        System.out.println( "  -" + SEP_OPTION + "=<separator>: the annotation-separator to be used (default: "
+                + "=<double>       : the cutoff for \"specific-hit\" support values (default: "
+                + SPECIFICS_CUTOFF_DEFAULT + ")" );
+        System.out.println( "  -" + SEP_OPTION + "=<separator>    : the annotation-separator to be used (default: "
                 + SEP_DEFAULT + ")" );
         System.out.println( "  -" + MAPPING_FILE_OPTION
                 + "=<mapping table>: to map node names to appropriate annotations (tab-separated, two columns) (default: no mapping)" );
         System.out.println( "  -" + QUERY_PATTERN_OPTION
                 + "=<query pattern>: the regular expression for the query (default: \"" + QUERY_PATTERN_DEFAULT
                 + "\" for pplacer output)" );
+        System.out.println( "  -" + EXTRA_PROCESSING_OPTION1
+                + "                : to enable extra processing of annotations (e.g. \"Q16611|A.1.1\" becomes \"A.1.1\")" );
+        System.out.println( "  -" + EXTRA_PROCESSING1_SEP_OPTION
+                + "=<separator>   : the separator for extra annotations (default: \"" + EXTRA_PROCESSING1_SEP_DEFAULT
+                + "\")" );
+        System.out.println( "  -" + EXTRA_PROCESSING1_KEEP_EXTRA_OPTION
+                + "               : to keep extra annotations (e.g. \"Q16611|A.1.1\" becomes \"A.1.1.Q16611\")" );
+        System.out.println( "  -" + VERBOSE_OPTION + "                : verbose" );
         System.out.println();
         System.out.println( "Examples:" );
         System.out.println();
         System.out.println( " " + PRG_NAME + " my_tree.nh result.tsv" );
         System.out.println( " " + PRG_NAME + " -c=0.5 -s=. my_tree.nh result.tsv" );
         System.out.println( " " + PRG_NAME + " -c=0.9 -s=_ -m=map.tsv my_tree.nh result.tsv" );
+        System.out.println( " " + PRG_NAME + " -x -xs=& -xk my_tree.nh result.tsv" );
+        System.out.println( " " + PRG_NAME + " -x -xs=\"|\" my_tree.nh result.tsv" );
         System.out.println();
     }
 }
index d920d77..2654b82 100644 (file)
@@ -33,52 +33,56 @@ package org.forester.clade_analysis;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
+import java.util.SortedMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.data.Confidence;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.util.ForesterUtil;
+import org.forester.util.UserException;
 
 public final class AnalysisMulti {
 
-    private final static String UNKNOWN = "?";
-    public final static double DEFAULT_CUTOFF_FOR_SPECIFICS = 0.5;
-    public final static String DEFAULT_SEPARATOR = ".";
+    private final static String UNKNOWN                                = "?";
+    public final static double  DEFAULT_CUTOFF_FOR_SPECIFICS           = 0.5;
+    public final static String  DEFAULT_SEPARATOR                      = ".";
     public final static Pattern DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE = Pattern.compile( ".+#\\d+_M=(.+)" );
-    
 
-    public static ResultMulti execute( final Phylogeny p ) {
+    public static ResultMulti execute( final Phylogeny p ) throws UserException {
         return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR, DEFAULT_CUTOFF_FOR_SPECIFICS );
     }
-    
-    public static ResultMulti execute( final Phylogeny p, final String separator ) {
-        return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator , DEFAULT_CUTOFF_FOR_SPECIFICS );
+
+    public static ResultMulti execute( final Phylogeny p, final String separator ) throws UserException {
+        return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator, DEFAULT_CUTOFF_FOR_SPECIFICS );
     }
-    
-    public static ResultMulti execute( final Phylogeny p, final String separator,  final double cutoff_for_specifics ) {
-        return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator , cutoff_for_specifics );
+
+    public static ResultMulti execute( final Phylogeny p, final String separator, final double cutoff_for_specifics )
+            throws UserException {
+        return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator, cutoff_for_specifics );
     }
-    
-    public static ResultMulti execute( final Phylogeny p, final double cutoff_for_specifics ) {
-        return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR , cutoff_for_specifics );
+
+    public static ResultMulti execute( final Phylogeny p, final double cutoff_for_specifics ) throws UserException {
+        return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR, cutoff_for_specifics );
     }
 
     public static ResultMulti execute( final Phylogeny p,
-                                   final Pattern query,
-                                   final String separator,
-                                   final double cutoff_for_specifics ) {
+                                       final Pattern query,
+                                       final String separator,
+                                       final double cutoff_for_specifics )
+            throws UserException {
+        cleanUpExternalNames( p, separator );
         final List<PhylogenyNode> qnodes = p.getNodes( query );
         final ResultMulti res = new ResultMulti();
         for( int i = 0; i < qnodes.size(); ++i ) {
             final PhylogenyNode qnode = qnodes.get( i );
-            //System.out.println( ">>" + qnode.getName() );
             if ( qnode.isRoot() ) {
-                throw new IllegalArgumentException( "Query " + query + " is root." );
+                throw new UserException( "query " + query + " is root" );
             }
             if ( qnode.getParent().isRoot() ) {
-                throw new IllegalArgumentException( "Parent of query " + query + " is root." );
+                throw new UserException( "parent of query " + query + " is root" );
             }
             PhylogenyNode qnode_p = qnode.getParent();
             PhylogenyNode qnode_pp = qnode.getParent().getParent();
@@ -92,17 +96,13 @@ public final class AnalysisMulti {
             final List<String> qnode_ext_nodes_names = new ArrayList<>();
             for( final PhylogenyNode qnode_ext_node : qnode_pp.getAllExternalDescendants() ) {
                 final String name = qnode_ext_node.getName();
-                if ( ForesterUtil.isEmptyTrimmed( name ) ) {
-                    throw new IllegalArgumentException( "external node(s) with empty names found" );
-                }
                 final Matcher m = query.matcher( name );
                 if ( !m.find() ) {
                     qnode_ext_nodes_names.add( name );
                 }
             }
             final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names, separator );
-            //  System.out.println( greatest_common_prefix );
-            Matcher matcher = query.matcher( qnode.getName() );
+            final Matcher matcher = query.matcher( qnode.getName() );
             String conf_str = null;
             if ( matcher.find() ) {
                 conf_str = matcher.group( 1 );
@@ -117,9 +117,7 @@ public final class AnalysisMulti {
             else {
                 res.addGreatestCommonPrefix( UNKNOWN, conf );
             }
-            //final String greatest_common_prefix_up[] = analyzeSiblings( qnode_p, qnode_pp, separator, query, res );
             final String greatest_common_prefix_up = analyzeSiblings( qnode_p, qnode_pp, separator, query );
-            //System.out.println( "greatest_common_prefix_up=" + greatest_common_prefix_up + " " + conf );
             if ( !ForesterUtil.isEmpty( greatest_common_prefix_up ) ) {
                 res.addGreatestCommonPrefixUp( greatest_common_prefix_up, conf );
             }
@@ -127,7 +125,6 @@ public final class AnalysisMulti {
                 res.addGreatestCommonPrefixUp( UNKNOWN, conf );
             }
             final String greatest_common_prefix_down = analyzeSiblings( qnode, qnode_p, separator, query );
-           // System.out.println( "greatest_common_prefix_down=" + greatest_common_prefix_down + " " + conf );
             if ( !ForesterUtil.isEmpty( greatest_common_prefix_down ) ) {
                 res.addGreatestCommonPrefixDown( greatest_common_prefix_down, conf );
             }
@@ -139,6 +136,40 @@ public final class AnalysisMulti {
         return res;
     }
 
+    private final static void cleanUpExternalNames( final Phylogeny p, final String separator ) throws UserException {
+        final Pattern pattern1 = Pattern.compile( "\\Q" + separator + "\\E" + "\\s+" );
+        final Pattern pattern2 = Pattern.compile( "\\s+" + "\\Q" + separator + "\\E" );
+        final Pattern pattern3 = Pattern.compile( "\\Q" + separator + separator + "\\E" );
+        final PhylogenyNodeIterator it = p.iteratorExternalForward();
+        while ( it.hasNext() ) {
+            final PhylogenyNode node = it.next();
+            final String name = node.getName().trim();
+            if ( ForesterUtil.isEmpty( name ) ) {
+                throw new UserException( "external node(s) with empty annotation found" );
+            }
+            if ( name.endsWith( separator ) ) {
+                throw new UserException( "illegally formatted annotation found: annotations cannot end with separator: "
+                        + name );
+            }
+            if ( name.startsWith( separator ) ) {
+                throw new UserException( "illegally formatted annotation found: annotations cannot start with separator: "
+                        + name );
+            }
+            if ( pattern1.matcher( name ).find() ) {
+                throw new UserException( "illegally formatted annotation found: separator followed by whitespace: "
+                        + name );
+            }
+            if ( pattern2.matcher( name ).find() ) {
+                throw new UserException( "illegally formatted annotation found: whitespace followed by separator: "
+                        + name );
+            }
+            if ( pattern3.matcher( name ).find() ) {
+                throw new UserException( "illegally formatted annotation found: empty annotation level: " + name );
+            }
+            node.setName( name.replaceAll( "\\s+", " " ) );
+        }
+    }
+
     private final static String analyzeSiblings( final PhylogenyNode child,
                                                  final PhylogenyNode parent,
                                                  final String separator,
@@ -146,23 +177,16 @@ public final class AnalysisMulti {
         final int child_index = child.getChildNodeIndex();
         final List<String> ext_nodes_names = new ArrayList<>();
         final List<PhylogenyNode> descs = parent.getDescendants();
-        // String conf = null;
         for( int i = 0; i < descs.size(); ++i ) {
             if ( i != child_index ) {
                 final PhylogenyNode d = descs.get( i );
                 for( final PhylogenyNode n : d.getAllExternalDescendants() ) {
                     final String name = n.getName();
-                    if ( ForesterUtil.isEmptyTrimmed( name ) ) {
-                        throw new IllegalArgumentException( "external node(s) with empty names found" );
-                    }
                     final Matcher m = query.matcher( name );
                     if ( !m.find() ) {
                         ext_nodes_names.add( name );
                     }
                 }
-                // if ( descs.size() == 2 ) {
-                //     conf = obtainConfidence( d );
-                // }
             }
         }
         final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names, separator );
@@ -170,7 +194,7 @@ public final class AnalysisMulti {
     }
 
     private final static String obtainConfidence( final PhylogenyNode n ) {
-        if ( n.getBranchData().getConfidences() != null && n.getBranchData().getConfidences().size() > 0 ) {
+        if ( ( n.getBranchData().getConfidences() != null ) && ( n.getBranchData().getConfidences().size() > 0 ) ) {
             final List<Confidence> confidences = n.getBranchData().getConfidences();
             boolean not_first = false;
             Collections.sort( confidences );
@@ -192,4 +216,86 @@ public final class AnalysisMulti {
         }
         return null;
     }
+
+    public final static void performMapping( final Pattern pattern,
+                                             final SortedMap<String, String> map,
+                                             final Phylogeny p,
+                                             final boolean verbose )
+            throws UserException {
+        if ( verbose ) {
+            System.out.println();
+            System.out.println( "Id to annotation mapping:" );
+        }
+        final PhylogenyNodeIterator it = p.iteratorExternalForward();
+        while ( it.hasNext() ) {
+            final PhylogenyNode node = it.next();
+            final String name = node.getName().trim();
+            if ( ForesterUtil.isEmpty( name ) ) {
+                throw new UserException( "external node with empty name found" );
+            }
+            final Matcher m = pattern.matcher( name );
+            if ( !m.find() ) {
+                if ( !map.containsKey( name ) ) {
+                    throw new UserException( "no mapping for \"" + name + "\" found" );
+                }
+                node.setName( map.get( name ).trim() );
+                if ( verbose ) {
+                    System.out.println( name + " -> " + node.getName() );
+                }
+            }
+        }
+        if ( verbose ) {
+            System.out.println();
+        }
+    }
+
+    public final static void performExtraProcessing1( final Pattern pattern,
+                                                      final Phylogeny p,
+                                                      final String extra_sep,
+                                                      final boolean keep,
+                                                      final String annotation_sep,
+                                                      final boolean verbose )
+            throws UserException {
+        if ( verbose ) {
+            System.out.println();
+            System.out.println( "Extra annotation processing:" );
+        }
+        final PhylogenyNodeIterator it = p.iteratorExternalForward();
+        while ( it.hasNext() ) {
+            final PhylogenyNode node = it.next();
+            final String name = node.getName().trim();
+            if ( ForesterUtil.isEmpty( name ) ) {
+                throw new UserException( "external node with empty name found" );
+            }
+            final Matcher m = pattern.matcher( name );
+            if ( !m.find() ) {
+                final StringBuilder sb = new StringBuilder();
+                final int last_index = name.lastIndexOf( extra_sep );
+                if ( last_index >= 0 ) {
+                    final String annotation = name.substring( last_index + 1 ).trim();
+                    if ( ForesterUtil.isEmptyTrimmed( annotation ) ) {
+                        throw new UserException( "illegal format:" + name );
+                    }
+                    if ( keep ) {
+                        final String extra = name.substring( 0, last_index ).trim();
+                        sb.append( annotation );
+                        if ( !ForesterUtil.isEmpty( extra ) ) {
+                            sb.append( annotation_sep );
+                            sb.append( extra );
+                        }
+                    }
+                    else {
+                        sb.append( annotation );
+                    }
+                    node.setName( sb.toString() );
+                    if ( verbose ) {
+                        System.out.println( name + " -> " + node.getName() );
+                    }
+                }
+            }
+        }
+        if ( verbose ) {
+            System.out.println();
+        }
+    }
 }
index 0fcda8b..e3b102e 100644 (file)
@@ -36,6 +36,7 @@ import java.util.SortedMap;
 import java.util.TreeMap;
 
 import org.forester.util.ForesterUtil;
+import org.forester.util.UserException;
 
 public final class ResultMulti {
 
@@ -178,7 +179,7 @@ public final class ResultMulti {
         _greatest_common_prefixes_down.add( new Prefix( prefix_down, confidence, _separator ) );
     }
 
-    final void analyze( final double cutoff_for_specifics ) {
+    final void analyze( final double cutoff_for_specifics ) throws UserException {
         reset();
         analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, cutoff_for_specifics );
         analyzeGreatestCommonPrefixesUp( _greatest_common_prefixes_up, _separator, cutoff_for_specifics );
@@ -202,7 +203,8 @@ public final class ResultMulti {
 
     private final void analyzeGreatestCommonPrefixes( final List<Prefix> greatest_common_prefixes,
                                                       final String separator,
-                                                      final double cutoff ) {
+                                                      final double cutoff )
+            throws UserException {
         final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes, separator );
         if ( !ForesterUtil.isEmpty( l ) ) {
             sortPrefixesAccordingToConfidence( l );
@@ -220,7 +222,8 @@ public final class ResultMulti {
 
     private final void analyzeGreatestCommonPrefixesUp( final List<Prefix> greatest_common_prefixes_up,
                                                         final String separator,
-                                                        final double cutoff ) {
+                                                        final double cutoff )
+            throws UserException {
         final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes_up, separator );
         if ( !ForesterUtil.isEmpty( l ) ) {
             sortPrefixesAccordingToConfidence( l );
@@ -238,7 +241,8 @@ public final class ResultMulti {
 
     final void analyzeGreatestCommonPrefixesDown( final List<Prefix> greatest_common_prefixes_down,
                                                   final String separator,
-                                                  final double cutoff ) {
+                                                  final double cutoff )
+            throws UserException {
         final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes_down, separator );
         if ( !ForesterUtil.isEmpty( l ) ) {
             sortPrefixesAccordingToConfidence( l );
@@ -283,7 +287,7 @@ public final class ResultMulti {
         return cleaned_spec;
     }
 
-    private final static List<Prefix> collapse( final List<Prefix> cleaned ) {
+    private final static List<Prefix> collapse( final List<Prefix> cleaned ) throws UserException {
         final List<Prefix> collapsed = new ArrayList<>();
         final Set<String> firsts = new HashSet<>();
         double confidence_sum = 0;
@@ -296,7 +300,7 @@ public final class ResultMulti {
             }
         }
         if ( !ForesterUtil.isEqual( confidence_sum, 1.0, 1E-5 ) ) {
-            throw new IllegalArgumentException( "Confidences add up to " + confidence_sum + " instead of 1.0" );
+            throw new UserException( "confidences add up to " + confidence_sum + " instead of 1.0" );
         }
         return collapsed;
     }
diff --git a/forester/java/src/org/forester/util/UserException.java b/forester/java/src/org/forester/util/UserException.java
new file mode 100644 (file)
index 0000000..7136b3f
--- /dev/null
@@ -0,0 +1,15 @@
+
+package org.forester.util;
+
+public class UserException extends Exception {
+
+    private static final long serialVersionUID = -4607485599991434499L;
+
+    public UserException( final String message ) {
+        super( message );
+    }
+
+    public UserException() {
+        super();
+    }
+}