X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Fcladinator.java;h=9d84bc332ffbcabe8527a90c89d640580b3d5e4d;hb=0f6e7d07eb1a625dd7ff3df0da406cafd4850fea;hp=d7f6e48699a8b287dada13c8a5f919c853725cd8;hpb=efaea5057ced91f5d162cebb6459d206da1d3c82;p=jalview.git diff --git a/forester/java/src/org/forester/application/cladinator.java b/forester/java/src/org/forester/application/cladinator.java index d7f6e48..9d84bc3 100644 --- a/forester/java/src/org/forester/application/cladinator.java +++ b/forester/java/src/org/forester/application/cladinator.java @@ -52,8 +52,8 @@ import org.forester.util.UserException; public final class cladinator { final static private String PRG_NAME = "cladinator"; - final static private String PRG_VERSION = "1.02"; - final static private String PRG_DATE = "170912"; + final static private String PRG_VERSION = "1.07"; + final static private String PRG_DATE = "1711xx"; final static private String PRG_DESC = "clades within clades of annotated labels -- analysis of pplacer-type outputs"; final static private String E_MAIL = "phyloxml@gmail.com"; final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; @@ -66,13 +66,16 @@ public final class cladinator { final static private String EXTRA_PROCESSING_OPTION1 = "x"; final static private String EXTRA_PROCESSING1_SEP_OPTION = "xs"; final static private String EXTRA_PROCESSING1_KEEP_EXTRA_OPTION = "xk"; + final static private String QUIET_OPTION = "Q"; + final static private String SPECIAL_PROCESSING_OPTION = "S"; final static private String VERBOSE_OPTION = "v"; - final static private double SPECIFICS_CUTOFF_DEFAULT = 0.8; + final static private String REMOVE_ANNOT_SEP_OPTION = "rs"; + final static private double SPECIFICS_CUTOFF_DEFAULT = 0.7; final static private String SEP_DEFAULT = "."; final static private Pattern QUERY_PATTERN_DEFAULT = AnalysisMulti.DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE; final static private String EXTRA_PROCESSING1_SEP_DEFAULT = "|"; final static private boolean EXTRA_PROCESSING1_KEEP_EXTRA_DEFAULT = false; - private final static DecimalFormat df = new DecimalFormat( "0.0#######" ); + private final static DecimalFormat df = new DecimalFormat( "0.0###" ); public static void main( final String args[] ) { try { @@ -99,7 +102,7 @@ public final class cladinator { print_help(); System.exit( -1 ); } - final List allowed_options = new ArrayList<>(); + final List allowed_options = new ArrayList(); allowed_options.add( SEP_OPTION ); allowed_options.add( QUERY_PATTERN_OPTION ); allowed_options.add( SPECIFICS_CUTOFF_OPTION ); @@ -107,7 +110,10 @@ public final class cladinator { allowed_options.add( EXTRA_PROCESSING_OPTION1 ); allowed_options.add( EXTRA_PROCESSING1_SEP_OPTION ); allowed_options.add( EXTRA_PROCESSING1_KEEP_EXTRA_OPTION ); + allowed_options.add( SPECIAL_PROCESSING_OPTION ); allowed_options.add( VERBOSE_OPTION ); + allowed_options.add( QUIET_OPTION ); + allowed_options.add( REMOVE_ANNOT_SEP_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); @@ -133,15 +139,16 @@ public final class cladinator { ForesterUtil.fatalError( PRG_NAME, "no value for separator option" ); } } - Pattern compiled_query_str = null; + Pattern compiled_query = null; if ( cla.isOptionSet( QUERY_PATTERN_OPTION ) ) { if ( cla.isOptionValueSet( QUERY_PATTERN_OPTION ) ) { final String query_str = cla.getOptionValue( QUERY_PATTERN_OPTION ); try { - compiled_query_str = Pattern.compile( query_str ); + compiled_query = Pattern.compile( query_str ); } catch ( final PatternSyntaxException e ) { - ForesterUtil.fatalError( PRG_NAME, "error in regular expression: " + e.getMessage() ); + ForesterUtil.fatalError( PRG_NAME, + "error in regular expression: " + query_str + ": " + e.getMessage() ); } } else { @@ -162,7 +169,7 @@ public final class cladinator { ForesterUtil.fatalError( PRG_NAME, "no value for mapping file" ); } } - final Pattern pattern = ( compiled_query_str != null ) ? compiled_query_str : QUERY_PATTERN_DEFAULT; + final Pattern pattern = ( compiled_query != null ) ? compiled_query : QUERY_PATTERN_DEFAULT; final File intreefile = cla.getFile( 0 ); final String error_intreefile = ForesterUtil.isReadableFile( intreefile ); if ( !ForesterUtil.isEmpty( error_intreefile ) ) { @@ -215,7 +222,7 @@ public final class cladinator { ForesterUtil.fatalError( PRG_NAME, "no value for extra processing separator" ); } } - if ( extra_processing1_sep != null && extra_processing1_sep.equals( separator ) ) { + if ( ( extra_processing1_sep != null ) && extra_processing1_sep.equals( separator ) ) { ForesterUtil.fatalError( PRG_NAME, "extra processing separator must not be the same the annotation-separator" ); } @@ -228,6 +235,37 @@ public final class cladinator { } extra_processing1_keep = true; } + Pattern special_pattern = null; + boolean special_processing = false; + if ( cla.isOptionSet( SPECIAL_PROCESSING_OPTION ) ) { + if ( extra_processing1 == true ) { + ForesterUtil + .fatalError( PRG_NAME, + "extra processing cannot be used together with special processing pattern" ); + } + if ( cla.isOptionValueSet( SPECIAL_PROCESSING_OPTION ) ) { + final String str = cla.getOptionValue( SPECIAL_PROCESSING_OPTION ); + try { + special_pattern = Pattern.compile( str ); + } + catch ( final PatternSyntaxException e ) { + ForesterUtil + .fatalError( PRG_NAME, + "error in special processing pattern: " + str + ": " + e.getMessage() ); + } + special_processing = true; + } + else { + ForesterUtil.fatalError( PRG_NAME, "no value for special processing pattern" ); + } + } + final boolean remove_annotation_sep; + if ( cla.isOptionSet( REMOVE_ANNOT_SEP_OPTION ) ) { + remove_annotation_sep = true; + } + else { + remove_annotation_sep = false; + } final boolean verbose; if ( cla.isOptionSet( VERBOSE_OPTION ) ) { verbose = true; @@ -235,6 +273,13 @@ public final class cladinator { else { verbose = false; } + final boolean quit; + if ( cla.isOptionSet( QUIET_OPTION ) ) { + quit = true; + } + else { + quit = false; + } System.out.println( "Input tree : " + intreefile ); System.out.println( "Specific-hit support cutoff: " + cutoff_specifics ); if ( mapping_file != null ) { @@ -242,41 +287,85 @@ public final class cladinator { + " rows)" ); } System.out.println( "Annotation-separator : " + separator ); + if ( remove_annotation_sep ) { + System.out.println( "Remove anno.-sep. in output: " + remove_annotation_sep ); + } System.out.println( "Query pattern : " + pattern ); - System.out.println( "Extra processing : " + extra_processing1 ); if ( extra_processing1 ) { + System.out.println( "Extra processing : " + extra_processing1 ); System.out.println( "Extra processing separator : " + extra_processing1_sep ); System.out.println( "Keep extra annotations : " + extra_processing1_keep ); } + if ( special_processing ) { + System.out.println( "Special processing : " + special_processing ); + System.out.println( "Special processing pattern : " + special_pattern ); + } if ( outtablefile != null ) { System.out.println( "Output table : " + outtablefile ); } - Phylogeny p = null; + Phylogeny phys[] = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( intreefile, true ); - p = factory.create( intreefile, pp )[ 0 ]; + phys = factory.create( intreefile, pp ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "Could not read \"" + intreefile + "\" [" + e.getMessage() + "]" ); - System.exit( -1 ); } - System.out.println( "Ext. nodes in input tree : " + p.getNumberOfExternalNodes() ); - if ( map != null ) { - AnalysisMulti.performMapping( pattern, map, p, verbose ); + if ( phys.length == 0 ) { + ForesterUtil.fatalError( PRG_NAME, "\"" + intreefile + "\" does not contain any trees" ); } - if ( extra_processing1 ) { - AnalysisMulti.performExtraProcessing1( pattern, - p, - extra_processing1_sep, - extra_processing1_keep, - separator, - verbose ); - } - final ResultMulti res = AnalysisMulti.execute( p, pattern, separator, cutoff_specifics ); - printResult( res ); + System.out.println( "Number of input trees : " + phys.length ); + if ( phys.length == 1 ) { + System.out.println( "Ext. nodes in input tree : " + phys[ 0 ].getNumberOfExternalNodes() ); + } + else { + System.out.println( "Ext. nodes in input tree 1 : " + phys[ 0 ].getNumberOfExternalNodes() ); + } + final EasyWriter outtable_writer; if ( outtablefile != null ) { - writeResultToTable( res, outtablefile ); + outtable_writer = ForesterUtil.createEasyWriter( outtablefile ); + outtable_writer.print( "#" + PRG_NAME + " " + PRG_VERSION + " " + PRG_DATE ); + outtable_writer.print( " Input tree: " + intreefile ); + outtable_writer.println( " Specific-hit support cutoff: " + cutoff_specifics ); + } + else { + outtable_writer = null; + } + int counter = 0; + for( final Phylogeny phy : phys ) { + if ( map != null ) { + AnalysisMulti.performMapping( pattern, map, phy, verbose ); + } + if ( extra_processing1 ) { + AnalysisMulti.performExtraProcessing1( pattern, + phy, + extra_processing1_sep, + extra_processing1_keep, + separator, + verbose ); + } + else if ( special_processing ) { + AnalysisMulti.performSpecialProcessing1( pattern, phy, separator, special_pattern, verbose ); + } + final ResultMulti res = AnalysisMulti.execute( phy, pattern, separator, cutoff_specifics ); + if ( !quit ) { + if ( phys.length == 1 ) { + printResult( res, -1, remove_annotation_sep ); + } + else { + printResult( res, counter, remove_annotation_sep ); + } + } + if ( outtable_writer != null ) { + writeResultToTable( res, outtable_writer, remove_annotation_sep ); + outtable_writer.flush(); + } + ++counter; + } + if ( outtable_writer != null ) { + outtable_writer.flush(); + outtable_writer.close(); } } catch ( final UserException e ) { @@ -291,134 +380,228 @@ public final class cladinator { } } - private final static void printResult( final ResultMulti res ) { - System.out.println(); - System.out.println( "Result:" ); + private final static void printResult( final ResultMulti res, + final int counter, + final boolean remove_annotation_sep ) { System.out.println(); + if ( counter == -1 ) { + System.out.println( "Result for " + res.getQueryNamePrefix() ); + } + else { + System.out.println( "Result for " + res.getQueryNamePrefix() + " [tree " + counter + "]" ); + } if ( ( res.getAllMultiHitPrefixes() == null ) | ( res.getAllMultiHitPrefixes().size() < 1 ) ) { - System.out.println( "No match to query pattern!" ); + System.out.println( " No match to query pattern!" ); } else { - System.out.println( "Matching Clade(s):" ); + System.out.println( " Matching Clade(s):" ); for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) { - System.out.println( prefix ); + if ( remove_annotation_sep ) { + System.out.println( " " + prefix.toStringRemovSeparator() ); + } + else { + System.out.println( " " + prefix ); + } } if ( res.isHasSpecificMultiHitsPrefixes() ) { System.out.println(); - System.out.println( "Specific-hit(s):" ); + System.out.println( " Specific-hit(s):" ); for( final Prefix prefix : res.getSpecificMultiHitPrefixes() ) { - System.out.println( prefix ); + if ( remove_annotation_sep ) { + System.out.println( " " + prefix.toStringRemovSeparator() ); + } + else { + System.out.println( " " + prefix ); + } } System.out.println(); - System.out.println( "Matching Clade(s) with Specific-hit(s):" ); + System.out.println( " Matching Clade(s) with Specific-hit(s):" ); for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) { - System.out.println( prefix ); + if ( remove_annotation_sep ) { + System.out.println( " " + prefix.toStringRemovSeparator() ); + } + else { + System.out.println( " " + prefix ); + } for( final Prefix spec : res.getSpecificMultiHitPrefixes() ) { if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) { - System.out.println( " " + spec ); + if ( remove_annotation_sep ) { + System.out.println( " " + spec.toStringRemovSeparator() ); + } + else { + System.out.println( " " + spec ); + } } } } } if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesDown() ) ) { System.out.println(); - System.out.println( "Matching Down-tree Bracketing Clade(s):" ); + System.out.println( " Matching Down-tree Bracketing Clade(s):" ); for( final Prefix prefix : res.getCollapsedMultiHitPrefixesDown() ) { - System.out.println( prefix ); + if ( remove_annotation_sep ) { + System.out.println( " " + prefix.toStringRemovSeparator() ); + } + else { + System.out.println( " " + prefix ); + } } } if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesUp() ) ) { System.out.println(); - System.out.println( "Matching Up-tree Bracketing Clade(s):" ); + System.out.println( " Matching Up-tree Bracketing Clade(s):" ); for( final Prefix prefix : res.getCollapsedMultiHitPrefixesUp() ) { - System.out.println( prefix ); + if ( remove_annotation_sep ) { + System.out.println( " " + prefix.toStringRemovSeparator() ); + } + else { + System.out.println( " " + prefix ); + } } } + System.out.println(); + System.out.println( " Total Number of Matches: " + res.getNumberOfMatches() + "/" + + res.getReferenceTreeNumberOfExternalNodes() ); } System.out.println(); } - private final static void writeResultToTable( final ResultMulti res, final File outtablefile ) throws IOException { - final EasyWriter w = ForesterUtil.createEasyWriter( outtablefile ); + private final static void writeResultToTable( final ResultMulti res, + final EasyWriter w, + final boolean remove_annotation_sep ) + throws IOException { if ( ( res.getAllMultiHitPrefixes() == null ) | ( res.getAllMultiHitPrefixes().size() < 1 ) ) { + w.print( res.getQueryNamePrefix() ); + w.print( "\t" ); w.println( "No match to query pattern!" ); } else { for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) { + w.print( res.getQueryNamePrefix() ); + w.print( "\t" ); w.print( "Matching Clades" ); w.print( "\t" ); - w.print( prefix.getPrefix() ); + if ( remove_annotation_sep ) { + w.print( prefix.getPrefixRemovSeparator() ); + } + else { + w.print( prefix.getPrefix() ); + } w.print( "\t" ); w.print( df.format( prefix.getConfidence() ) ); + w.print( "\t" ); + w.print( String.valueOf( res.getNumberOfMatches() ) ); + w.print( "\t" ); + w.print( String.valueOf( res.getReferenceTreeNumberOfExternalNodes() ) ); w.println(); } if ( res.isHasSpecificMultiHitsPrefixes() ) { for( final Prefix prefix : res.getSpecificMultiHitPrefixes() ) { + w.print( res.getQueryNamePrefix() ); + w.print( "\t" ); w.print( "Specific-hits" ); w.print( "\t" ); - w.print( prefix.getPrefix() ); + if ( remove_annotation_sep ) { + w.print( prefix.getPrefixRemovSeparator() ); + } + else { + w.print( prefix.getPrefix() ); + } w.print( "\t" ); w.print( df.format( prefix.getConfidence() ) ); + w.print( "\t" ); + w.print( String.valueOf( res.getNumberOfMatches() ) ); + w.print( "\t" ); + w.print( String.valueOf( res.getReferenceTreeNumberOfExternalNodes() ) ); w.println(); } } if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesDown() ) ) { for( final Prefix prefix : res.getCollapsedMultiHitPrefixesDown() ) { + w.print( res.getQueryNamePrefix() ); + w.print( "\t" ); w.print( "Matching Down-tree Bracketing Clades" ); w.print( "\t" ); - w.print( prefix.getPrefix() ); + if ( remove_annotation_sep ) { + w.print( prefix.getPrefixRemovSeparator() ); + } + else { + w.print( prefix.getPrefix() ); + } w.print( "\t" ); w.print( df.format( prefix.getConfidence() ) ); + w.print( "\t" ); + w.print( String.valueOf( res.getNumberOfMatches() ) ); + w.print( "\t" ); + w.print( String.valueOf( res.getReferenceTreeNumberOfExternalNodes() ) ); w.println(); } } if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesUp() ) ) { for( final Prefix prefix : res.getCollapsedMultiHitPrefixesUp() ) { + w.print( res.getQueryNamePrefix() ); + w.print( "\t" ); w.print( "Matching Up-tree Bracketing Clades" ); w.print( "\t" ); - w.print( prefix.getPrefix() ); + if ( remove_annotation_sep ) { + w.print( prefix.getPrefixRemovSeparator() ); + } + else { + w.print( prefix.getPrefix() ); + } w.print( "\t" ); w.print( df.format( prefix.getConfidence() ) ); + w.print( "\t" ); + w.print( String.valueOf( res.getNumberOfMatches() ) ); + w.print( "\t" ); + w.print( String.valueOf( res.getReferenceTreeNumberOfExternalNodes() ) ); w.println(); } } } - w.flush(); - w.close(); } private final static void print_help() { System.out.println( "Usage:" ); System.out.println(); - System.out.println( PRG_NAME + " [options] [output table file]" ); + System.out.println( PRG_NAME + " [options] [output table file]" ); System.out.println(); System.out.println( " options:" ); System.out.println( " -" + SPECIFICS_CUTOFF_OPTION - + "= : the cutoff for \"specific-hit\" support values (default: " + + "= : the minimal confidence value for \"specific-hits\" to be reported (default: " + SPECIFICS_CUTOFF_DEFAULT + ")" ); - System.out.println( " -" + SEP_OPTION + "= : the annotation-separator to be used (default: " - + SEP_DEFAULT + ")" ); + System.out.println( " -" + SEP_OPTION + "= : the annotation-separator to be used (default: \"" + + SEP_DEFAULT + "\")" ); System.out.println( " -" + MAPPING_FILE_OPTION - + "=: to map node names to appropriate annotations (tab-separated, two columns) (default: no mapping)" ); - System.out.println( " -" + QUERY_PATTERN_OPTION - + "=: the regular expression for the query (default: \"" + QUERY_PATTERN_DEFAULT - + "\" for pplacer output)" ); + + "= : to map node names to appropriate annotations (tab-separated, two columns) (default: no mapping)" ); System.out.println( " -" + EXTRA_PROCESSING_OPTION1 - + " : to enable extra processing of annotations (e.g. \"Q16611|A.1.1\" becomes \"A.1.1\")" ); + + " : to enable extra processing of annotations (e.g. \"Q16611|A.1.1\" becomes \"A.1.1\")" ); System.out.println( " -" + EXTRA_PROCESSING1_SEP_OPTION - + "= : the separator for extra annotations (default: \"" + EXTRA_PROCESSING1_SEP_DEFAULT + + "= : the separator for extra annotations (default: \"" + EXTRA_PROCESSING1_SEP_DEFAULT + "\")" ); System.out.println( " -" + EXTRA_PROCESSING1_KEEP_EXTRA_OPTION - + " : to keep extra annotations (e.g. \"Q16611|A.1.1\" becomes \"A.1.1.Q16611\")" ); - System.out.println( " -" + VERBOSE_OPTION + " : verbose" ); + + " : to keep extra annotations (e.g. \"Q16611|A.1.1\" becomes \"A.1.1.Q16611\")" ); + System.out.println( " -" + SPECIAL_PROCESSING_OPTION + + "= : special processing with pattern (e.g. \"(\\d+)([a-z]+)_.+\" for changing \"6q_EF42\" to \"6.q\")" ); + System.out.println( " -" + REMOVE_ANNOT_SEP_OPTION + + " : to remove the annotation-separator in the output (e.g. the \"" + SEP_DEFAULT + + "\")" ); + System.out.println( " -" + VERBOSE_OPTION + " : verbose" ); + System.out.println( " -" + QUIET_OPTION + + " : quiet (no output to console, for when used in a pipeline)" ); + System.out.println( " --" + QUERY_PATTERN_OPTION + + "= : expert option: the regular expression pattern for the query (default: \"" + + QUERY_PATTERN_DEFAULT + "\" for pplacer output)" ); System.out.println(); System.out.println( "Examples:" ); System.out.println(); - System.out.println( " " + PRG_NAME + " my_tree.nh result.tsv" ); - System.out.println( " " + PRG_NAME + " -c=0.5 -s=. my_tree.nh result.tsv" ); - System.out.println( " " + PRG_NAME + " -c=0.9 -s=_ -m=map.tsv my_tree.nh result.tsv" ); - System.out.println( " " + PRG_NAME + " -x -xs=& -xk my_tree.nh result.tsv" ); - System.out.println( " " + PRG_NAME + " -x -xs=\"|\" my_tree.nh result.tsv" ); + System.out.println( " " + PRG_NAME + " pp_out_tree.sing.tre result.tsv" ); + System.out.println( " " + PRG_NAME + " -c=0.5 -s=. pp_out_tree.sing.tre result.tsv" ); + System.out.println( " " + PRG_NAME + " -c=0.9 -s=_ -m=map.tsv pp_out_trees.sing.tre result.tsv" ); + System.out.println( " " + PRG_NAME + " -x -xs=& -xk pp_out_trees.sing.tre result.tsv" ); + System.out.println( " " + PRG_NAME + " -x -xs=\"|\" pp_out_trees.sing.tre result.tsv" ); + System.out.println( " " + PRG_NAME + " -x -xk -m=map.tsv pp_out_trees.sing.tre result.tsv" ); + System.out.println( " " + PRG_NAME + " -m=map.tsv -S='(\\d+)([a-z?]*)_.+' pp_out_trees.sing.tre result.tsv" ); System.out.println(); } }