in progress...
authorcmzmasek <chris.zma@outlook.com>
Wed, 20 Sep 2017 18:21:09 +0000 (11:21 -0700)
committercmzmasek <chris.zma@outlook.com>
Wed, 20 Sep 2017 18:21:09 +0000 (11:21 -0700)
forester/java/src/org/forester/application/cladinator.java
forester/java/src/org/forester/clade_analysis/AnalysisMulti.java

index ae8d970..a4b273f 100644 (file)
@@ -53,7 +53,7 @@ public final class cladinator {
 
     final static private String        PRG_NAME                             = "cladinator";
     final static private String        PRG_VERSION                          = "1.05";
-    final static private String        PRG_DATE                             = "170919";
+    final static private String        PRG_DATE                             = "170920";
     final static private String        PRG_DESC                             = "clades within clades of annotated labels -- analysis of pplacer-type outputs";
     final static private String        E_MAIL                               = "phyloxml@gmail.com";
     final static private String        WWW                                  = "https://sites.google.com/site/cmzmasek/home/software/forester";
@@ -233,7 +233,6 @@ public final class cladinator {
                 }
                 extra_processing1_keep = true;
             }
-            
             Pattern special_pattern = null;
             boolean special_processing = false;
             if ( cla.isOptionSet( SPECIAL_PROCESSING_OPTION ) ) {
@@ -516,11 +515,11 @@ public final class cladinator {
         System.out.println( "  -" + EXTRA_PROCESSING1_KEEP_EXTRA_OPTION
                 + "                : to keep extra annotations (e.g. \"Q16611|A.1.1\" becomes \"A.1.1.Q16611\")" );
         System.out.println( "  -" + SPECIAL_PROCESSING_OPTION
-                + "=<pattern>       : special processing with pattern (e.g. \"(\\d+)([a-z]+)_(.+)\" for \"6q_EF42\" to \"6.q.EF42\")" );
+                + "=<pattern>       : special processing with pattern (e.g. \"(\\d+)([a-z]+)_.+\" for changing \"6q_EF42\" to \"6.q\")" );
         System.out.println( "  -" + VERBOSE_OPTION + "                 : verbose" );
-        System.out.println( "  -" + QUIET_OPTION + "                 : quiet" );
+        System.out.println( "  -" + QUIET_OPTION + "                 : quiet (for when used in a pipeline)" );
         System.out.println( "  --" + QUERY_PATTERN_OPTION
-                + "=<query pattern>: the regular expression for the query (default: \"" + QUERY_PATTERN_DEFAULT
+                + "=<pattern>      : the regular expression pattern for the query (default: \"" + QUERY_PATTERN_DEFAULT
                 + "\" for pplacer output)" );
         System.out.println();
         System.out.println( "Examples:" );
@@ -531,6 +530,7 @@ public final class cladinator {
         System.out.println( " " + PRG_NAME + " -x -xs=& -xk pp_out_trees.sing.tre result.tsv" );
         System.out.println( " " + PRG_NAME + " -x -xs=\"|\" pp_out_trees.sing.tre result.tsv" );
         System.out.println( " " + PRG_NAME + " -x -xk -m=map.tsv pp_out_trees.sing.tre result.tsv" );
+        System.out.println( " " + PRG_NAME + " -m=map.tsv -S='(\\d+)([a-z?]*)_.+' pp_out_trees.sing.tre result.tsv" );
         System.out.println();
     }
 }
index 8c35f93..44f46cd 100644 (file)
@@ -252,7 +252,7 @@ public final class AnalysisMulti {
         }
     }
 
-    public final static void performExtraProcessing1( final Pattern pattern,
+    public final static void performExtraProcessing1( final Pattern query_pattern,
                                                       final Phylogeny p,
                                                       final String extra_sep,
                                                       final boolean keep,
@@ -270,14 +270,13 @@ public final class AnalysisMulti {
             if ( ForesterUtil.isEmpty( name ) ) {
                 throw new UserException( "external node with empty name found" );
             }
-            final Matcher m = pattern.matcher( name );
-            if ( !m.find() ) {
+            if ( !query_pattern.matcher( name ).find() ) {
                 final StringBuilder sb = new StringBuilder();
                 final int last_index = name.lastIndexOf( extra_sep );
                 if ( last_index >= 0 ) {
                     final String annotation = name.substring( last_index + 1 ).trim();
                     if ( ForesterUtil.isEmptyTrimmed( annotation ) ) {
-                        throw new UserException( "illegal format: " + name );
+                        throw new UserException( "llegally formatted annotation: " + name );
                     }
                     if ( keep ) {
                         final String extra = name.substring( 0, last_index ).trim();
@@ -301,8 +300,8 @@ public final class AnalysisMulti {
             System.out.println();
         }
     }
-  
-    public final static void performSpecialProcessing1( final Pattern pattern,
+
+    public final static void performSpecialProcessing1( final Pattern query_pattern,
                                                         final Phylogeny p,
                                                         final String annotation_sep,
                                                         final Pattern special_pattern,
@@ -319,20 +318,23 @@ public final class AnalysisMulti {
             if ( ForesterUtil.isEmpty( name ) ) {
                 throw new UserException( "external node with empty name found" );
             }
-            final Matcher m = pattern.matcher( name );
-            if ( !m.find() ) {
+            if ( !query_pattern.matcher( name ).find() ) {
                 final Matcher special_m = special_pattern.matcher( name );
                 if ( special_m.matches() ) {
                     final int c = special_m.groupCount();
-                    if ( c < 2 ) {
-                        throw new UserException( "illegal special pattern: " + special_pattern );
+                    if ( c < 1 ) {
+                        throw new UserException( "illegal special pattern: " + special_pattern
+                                + " (need at least one capturing group)" );
                     }
                     final StringBuilder sb = new StringBuilder();
-                    for( int i = 1; i < c; ++i ) {
-                        if ( c > 1 ) {
-                            sb.append( annotation_sep );
+                    for( int i = 1; i <= c; ++i ) {
+                        final String g = special_m.group( i );
+                        if ( !ForesterUtil.isEmpty( g ) ) {
+                            if ( i > 1 ) {
+                                sb.append( annotation_sep );
+                            }
+                            sb.append( special_m.group( i ) );
                         }
-                        sb.append( special_m.group( i ) );
                     }
                     node.setName( sb.toString() );
                     if ( verbose ) {
@@ -340,8 +342,8 @@ public final class AnalysisMulti {
                     }
                 }
                 else {
-                    throw new UserException( "illegal format for special processing: " + name + " (expected pattern: "
-                            + special_pattern + ")" );
+                    throw new UserException( "illegally formatted annotation for special processing: " + name
+                            + " (expected pattern: " + special_pattern + ")" );
                 }
             }
         }