From 6e215ced38dfa1d0d0d9739f77285e4ece98349d Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Wed, 13 Sep 2017 12:07:44 -0700 Subject: [PATCH] in progress... --- .../src/org/forester/application/cladinator.java | 128 ++++++++++++-------- .../org/forester/clade_analysis/AnalysisMulti.java | 21 +++- .../src/org/forester/clade_analysis/Prefix.java | 2 +- .../org/forester/clade_analysis/ResultMulti.java | 15 +++ 4 files changed, 115 insertions(+), 51 deletions(-) diff --git a/forester/java/src/org/forester/application/cladinator.java b/forester/java/src/org/forester/application/cladinator.java index d7f6e48..993e295 100644 --- a/forester/java/src/org/forester/application/cladinator.java +++ b/forester/java/src/org/forester/application/cladinator.java @@ -52,8 +52,8 @@ import org.forester.util.UserException; public final class cladinator { final static private String PRG_NAME = "cladinator"; - final static private String PRG_VERSION = "1.02"; - final static private String PRG_DATE = "170912"; + final static private String PRG_VERSION = "1.03"; + final static private String PRG_DATE = "170913"; final static private String PRG_DESC = "clades within clades of annotated labels -- analysis of pplacer-type outputs"; final static private String E_MAIL = "phyloxml@gmail.com"; final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; @@ -72,7 +72,7 @@ public final class cladinator { final static private Pattern QUERY_PATTERN_DEFAULT = AnalysisMulti.DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE; final static private String EXTRA_PROCESSING1_SEP_DEFAULT = "|"; final static private boolean EXTRA_PROCESSING1_KEEP_EXTRA_DEFAULT = false; - private final static DecimalFormat df = new DecimalFormat( "0.0#######" ); + private final static DecimalFormat df = new DecimalFormat( "0.0###" ); public static void main( final String args[] ) { try { @@ -215,7 +215,7 @@ public final class cladinator { ForesterUtil.fatalError( PRG_NAME, "no value for extra processing separator" ); } } - if ( extra_processing1_sep != null && extra_processing1_sep.equals( separator ) ) { + if ( ( extra_processing1_sep != null ) && extra_processing1_sep.equals( separator ) ) { ForesterUtil.fatalError( PRG_NAME, "extra processing separator must not be the same the annotation-separator" ); } @@ -251,32 +251,54 @@ public final class cladinator { if ( outtablefile != null ) { System.out.println( "Output table : " + outtablefile ); } - Phylogeny p = null; + Phylogeny phys[] = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( intreefile, true ); - p = factory.create( intreefile, pp )[ 0 ]; + phys = factory.create( intreefile, pp ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "Could not read \"" + intreefile + "\" [" + e.getMessage() + "]" ); - System.exit( -1 ); } - System.out.println( "Ext. nodes in input tree : " + p.getNumberOfExternalNodes() ); - if ( map != null ) { - AnalysisMulti.performMapping( pattern, map, p, verbose ); + if ( phys.length == 0 ) { + ForesterUtil.fatalError( PRG_NAME, "\"" + intreefile + "\" does not contain any trees" ); } - if ( extra_processing1 ) { - AnalysisMulti.performExtraProcessing1( pattern, - p, - extra_processing1_sep, - extra_processing1_keep, - separator, - verbose ); - } - final ResultMulti res = AnalysisMulti.execute( p, pattern, separator, cutoff_specifics ); - printResult( res ); + System.out.println( "Number of input trees : " + phys.length ); + if ( phys.length == 1 ) { + System.out.println( "Ext. nodes in input tree 1 : " + phys[ 0 ].getNumberOfExternalNodes() ); + } + else { + System.out.println( "Ext. nodes in input tree : " + phys[ 0 ].getNumberOfExternalNodes() ); + } + final EasyWriter outtable_writer; if ( outtablefile != null ) { - writeResultToTable( res, outtablefile ); + outtable_writer = ForesterUtil.createEasyWriter( outtablefile ); + } + else { + outtable_writer = null; + } + for( final Phylogeny phy : phys ) { + if ( map != null ) { + AnalysisMulti.performMapping( pattern, map, phy, verbose ); + } + if ( extra_processing1 ) { + AnalysisMulti.performExtraProcessing1( pattern, + phy, + extra_processing1_sep, + extra_processing1_keep, + separator, + verbose ); + } + final ResultMulti res = AnalysisMulti.execute( phy, pattern, separator, cutoff_specifics ); + printResult( res ); + if ( outtable_writer != null ) { + writeResultToTable( res, outtable_writer ); + outtable_writer.flush(); + } + } + if ( outtable_writer != null ) { + outtable_writer.flush(); + outtable_writer.close(); } } catch ( final UserException e ) { @@ -293,58 +315,61 @@ public final class cladinator { private final static void printResult( final ResultMulti res ) { System.out.println(); - System.out.println( "Result:" ); + System.out.println( "Result for " + res.getQueryNamePrefix() ); System.out.println(); if ( ( res.getAllMultiHitPrefixes() == null ) | ( res.getAllMultiHitPrefixes().size() < 1 ) ) { - System.out.println( "No match to query pattern!" ); + System.out.println( " No match to query pattern!" ); } else { - System.out.println( "Matching Clade(s):" ); + System.out.println( " Matching Clade(s):" ); for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) { - System.out.println( prefix ); + System.out.println( " " + prefix ); } if ( res.isHasSpecificMultiHitsPrefixes() ) { System.out.println(); - System.out.println( "Specific-hit(s):" ); + System.out.println( " Specific-hit(s):" ); for( final Prefix prefix : res.getSpecificMultiHitPrefixes() ) { - System.out.println( prefix ); + System.out.println( " " + prefix ); } System.out.println(); - System.out.println( "Matching Clade(s) with Specific-hit(s):" ); + System.out.println( " Matching Clade(s) with Specific-hit(s):" ); for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) { - System.out.println( prefix ); + System.out.println( " " + prefix ); for( final Prefix spec : res.getSpecificMultiHitPrefixes() ) { if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) { - System.out.println( " " + spec ); + System.out.println( " " + spec ); } } } } if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesDown() ) ) { System.out.println(); - System.out.println( "Matching Down-tree Bracketing Clade(s):" ); + System.out.println( " Matching Down-tree Bracketing Clade(s):" ); for( final Prefix prefix : res.getCollapsedMultiHitPrefixesDown() ) { - System.out.println( prefix ); + System.out.println( " " + prefix ); } } if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesUp() ) ) { System.out.println(); - System.out.println( "Matching Up-tree Bracketing Clade(s):" ); + System.out.println( " Matching Up-tree Bracketing Clade(s):" ); for( final Prefix prefix : res.getCollapsedMultiHitPrefixesUp() ) { - System.out.println( prefix ); + System.out.println( " " + prefix ); } } } System.out.println(); } - private final static void writeResultToTable( final ResultMulti res, final File outtablefile ) throws IOException { - final EasyWriter w = ForesterUtil.createEasyWriter( outtablefile ); + private final static void writeResultToTable( final ResultMulti res, final EasyWriter w ) throws IOException { if ( ( res.getAllMultiHitPrefixes() == null ) | ( res.getAllMultiHitPrefixes().size() < 1 ) ) { + w.print( res.getQueryNamePrefix() ); + w.print( "\t" ); w.println( "No match to query pattern!" ); } else { for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) { + w.print( res.getQueryNamePrefix() ); + w.print( "\t" ); w.print( "Matching Clades" ); w.print( "\t" ); w.print( prefix.getPrefix() ); @@ -354,6 +379,8 @@ public final class cladinator { } if ( res.isHasSpecificMultiHitsPrefixes() ) { for( final Prefix prefix : res.getSpecificMultiHitPrefixes() ) { + w.print( res.getQueryNamePrefix() ); + w.print( "\t" ); w.print( "Specific-hits" ); w.print( "\t" ); w.print( prefix.getPrefix() ); @@ -364,6 +391,8 @@ public final class cladinator { } if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesDown() ) ) { for( final Prefix prefix : res.getCollapsedMultiHitPrefixesDown() ) { + w.print( res.getQueryNamePrefix() ); + w.print( "\t" ); w.print( "Matching Down-tree Bracketing Clades" ); w.print( "\t" ); w.print( prefix.getPrefix() ); @@ -374,6 +403,8 @@ public final class cladinator { } if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesUp() ) ) { for( final Prefix prefix : res.getCollapsedMultiHitPrefixesUp() ) { + w.print( res.getQueryNamePrefix() ); + w.print( "\t" ); w.print( "Matching Up-tree Bracketing Clades" ); w.print( "\t" ); w.print( prefix.getPrefix() ); @@ -383,34 +414,32 @@ public final class cladinator { } } } - w.flush(); - w.close(); } private final static void print_help() { System.out.println( "Usage:" ); System.out.println(); - System.out.println( PRG_NAME + " [options] [output table file]" ); + System.out.println( PRG_NAME + " [options] [output table file]" ); System.out.println(); System.out.println( " options:" ); System.out.println( " -" + SPECIFICS_CUTOFF_OPTION - + "= : the cutoff for \"specific-hit\" support values (default: " + + "= : the cutoff for \"specific-hit\" support values (default: " + SPECIFICS_CUTOFF_DEFAULT + ")" ); - System.out.println( " -" + SEP_OPTION + "= : the annotation-separator to be used (default: " + System.out.println( " -" + SEP_OPTION + "= : the annotation-separator to be used (default: " + SEP_DEFAULT + ")" ); System.out.println( " -" + MAPPING_FILE_OPTION - + "=: to map node names to appropriate annotations (tab-separated, two columns) (default: no mapping)" ); - System.out.println( " -" + QUERY_PATTERN_OPTION - + "=: the regular expression for the query (default: \"" + QUERY_PATTERN_DEFAULT - + "\" for pplacer output)" ); + + "= : to map node names to appropriate annotations (tab-separated, two columns) (default: no mapping)" ); System.out.println( " -" + EXTRA_PROCESSING_OPTION1 - + " : to enable extra processing of annotations (e.g. \"Q16611|A.1.1\" becomes \"A.1.1\")" ); + + " : to enable extra processing of annotations (e.g. \"Q16611|A.1.1\" becomes \"A.1.1\")" ); System.out.println( " -" + EXTRA_PROCESSING1_SEP_OPTION - + "= : the separator for extra annotations (default: \"" + EXTRA_PROCESSING1_SEP_DEFAULT + + "= : the separator for extra annotations (default: \"" + EXTRA_PROCESSING1_SEP_DEFAULT + "\")" ); System.out.println( " -" + EXTRA_PROCESSING1_KEEP_EXTRA_OPTION - + " : to keep extra annotations (e.g. \"Q16611|A.1.1\" becomes \"A.1.1.Q16611\")" ); - System.out.println( " -" + VERBOSE_OPTION + " : verbose" ); + + " : to keep extra annotations (e.g. \"Q16611|A.1.1\" becomes \"A.1.1.Q16611\")" ); + System.out.println( " -" + VERBOSE_OPTION + " : verbose" ); + System.out.println( " --" + QUERY_PATTERN_OPTION + + "=: the regular expression for the query (default: \"" + QUERY_PATTERN_DEFAULT + + "\" for pplacer output)" ); System.out.println(); System.out.println( "Examples:" ); System.out.println(); @@ -419,6 +448,7 @@ public final class cladinator { System.out.println( " " + PRG_NAME + " -c=0.9 -s=_ -m=map.tsv my_tree.nh result.tsv" ); System.out.println( " " + PRG_NAME + " -x -xs=& -xk my_tree.nh result.tsv" ); System.out.println( " " + PRG_NAME + " -x -xs=\"|\" my_tree.nh result.tsv" ); + System.out.println( " " + PRG_NAME + " -x -xk -m=map.tsv pplacer_out_trees.sing.tre result.tsv" ); System.out.println(); } } diff --git a/forester/java/src/org/forester/clade_analysis/AnalysisMulti.java b/forester/java/src/org/forester/clade_analysis/AnalysisMulti.java index 2654b82..8eccc03 100644 --- a/forester/java/src/org/forester/clade_analysis/AnalysisMulti.java +++ b/forester/java/src/org/forester/clade_analysis/AnalysisMulti.java @@ -49,7 +49,7 @@ public final class AnalysisMulti { private final static String UNKNOWN = "?"; public final static double DEFAULT_CUTOFF_FOR_SPECIFICS = 0.5; public final static String DEFAULT_SEPARATOR = "."; - public final static Pattern DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE = Pattern.compile( ".+#\\d+_M=(.+)" ); + public final static Pattern DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE = Pattern.compile( "_#\\d+_M=(.+)" ); public static ResultMulti execute( final Phylogeny p ) throws UserException { return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR, DEFAULT_CUTOFF_FOR_SPECIFICS ); @@ -75,7 +75,26 @@ public final class AnalysisMulti { throws UserException { cleanUpExternalNames( p, separator ); final List qnodes = p.getNodes( query ); + String query_name_prefix = null; + for( final PhylogenyNode n : qnodes ) { + final String name = n.getName(); + final Matcher matcher = query.matcher( name ); + if ( matcher.find() ) { + final String prefix = name.substring( 0, matcher.start() ); + if ( ForesterUtil.isEmpty( prefix ) ) { + throw new UserException( "query nodes with empty label prefix found: \"" + prefix + "\"" ); + } + if ( query_name_prefix == null ) { + query_name_prefix = prefix; + } + else if ( !query_name_prefix.equals( prefix ) ) { + throw new UserException( "query nodes with different label prefixes found: \"" + query_name_prefix + + "\" and \"" + prefix + "\"" ); + } + } + } final ResultMulti res = new ResultMulti(); + res.setQueryNamePrefix( query_name_prefix ); for( int i = 0; i < qnodes.size(); ++i ) { final PhylogenyNode qnode = qnodes.get( i ); if ( qnode.isRoot() ) { diff --git a/forester/java/src/org/forester/clade_analysis/Prefix.java b/forester/java/src/org/forester/clade_analysis/Prefix.java index 64a7b47..e4bf8fa 100644 --- a/forester/java/src/org/forester/clade_analysis/Prefix.java +++ b/forester/java/src/org/forester/clade_analysis/Prefix.java @@ -6,7 +6,7 @@ import java.text.DecimalFormat; public final class Prefix { - private final static DecimalFormat df = new DecimalFormat( "0.0#####" ); + private final static DecimalFormat df = new DecimalFormat( "0.0###" ); private final String _prefix; private final BigDecimal _confidence; private final String _separator; diff --git a/forester/java/src/org/forester/clade_analysis/ResultMulti.java b/forester/java/src/org/forester/clade_analysis/ResultMulti.java index e3b102e..566a646 100644 --- a/forester/java/src/org/forester/clade_analysis/ResultMulti.java +++ b/forester/java/src/org/forester/clade_analysis/ResultMulti.java @@ -56,6 +56,7 @@ public final class ResultMulti { private List _collapsed_down = null; private List _cleaned_spec_down = null; private boolean _has_specifics_down = false; + private String _query_name_prefix = ""; ResultMulti( final String separator ) { _separator = separator; @@ -115,9 +116,16 @@ public final class ResultMulti { return _has_specifics; } + public String getQueryNamePrefix() { + return _query_name_prefix; + } + @Override public final String toString() { final StringBuilder sb = new StringBuilder(); + sb.append( "Query: " ); + sb.append( getQueryNamePrefix() ); + sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( "Matching Clade(s):" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix prefix : _collapsed ) { @@ -179,6 +187,13 @@ public final class ResultMulti { _greatest_common_prefixes_down.add( new Prefix( prefix_down, confidence, _separator ) ); } + void setQueryNamePrefix( final String query_name_prefix ) { + if ( !ForesterUtil.isEmpty( _query_name_prefix ) ) { + throw new IllegalStateException( "illegal attempt to change the query name prefix" ); + } + _query_name_prefix = query_name_prefix; + } + final void analyze( final double cutoff_for_specifics ) throws UserException { reset(); analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, cutoff_for_specifics ); -- 1.7.10.2