import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
import org.forester.clade_analysis.AnalysisMulti;
import org.forester.clade_analysis.Prefix;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.util.CommandLineArguments;
+import org.forester.util.EasyWriter;
import org.forester.util.ForesterUtil;
public final class cladinator {
- final static private String PRG_NAME = "cladinator";
- final static private String PRG_VERSION = "0.100";
- final static private String PRG_DATE = "170823";
- final static private String PRG_DESC = "clades within clades -- analysis of pplacer type outputs";
- final static private String E_MAIL = "phyloxml@gmail.com";
- final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
- final static private String HELP_OPTION_1 = "help";
- final static private String HELP_OPTION_2 = "h";
- final static private String SEP_OPTION = "s";
- private final static DecimalFormat df2 = new DecimalFormat( "0.0#" );
+ final static private String PRG_NAME = "cladinator";
+ final static private String PRG_VERSION = "1.00";
+ final static private String PRG_DATE = "170902";
+ final static private String PRG_DESC = "clades within clades of annotated labels -- analysis of pplacer-type outputs";
+ final static private String E_MAIL = "phyloxml@gmail.com";
+ final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
+ final static private String HELP_OPTION_1 = "help";
+ final static private String HELP_OPTION_2 = "h";
+ final static private String SEP_OPTION = "s";
+ final static private String QUERY_PATTERN_OPTION = "q";
+ final static private String SPECIFICS_CUTOFF_OPTION = "c";
+ final static private double SPECIFICS_CUTOFF_DEFAULT = 0.8;
+ final static private String SEP_DEFAULT = ".";
+ final static private Pattern QUERY_PATTERN_DEFAULT = AnalysisMulti.DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE;
+ private final static DecimalFormat df = new DecimalFormat( "0.0#######" );
public static void main( final String args[] ) {
try {
print_help();
System.exit( 0 );
}
- else if ( ( ( args.length != 2 ) && ( args.length != 3 ) ) ) {
- System.out.println();
- System.out.println( "Wrong number of arguments." );
- System.out.println();
+ if ( ( cla.getNumberOfNames() != 1 ) && ( cla.getNumberOfNames() != 2 ) ) {
print_help();
System.exit( -1 );
}
final List<String> allowed_options = new ArrayList<>();
allowed_options.add( SEP_OPTION );
+ allowed_options.add( QUERY_PATTERN_OPTION );
+ allowed_options.add( SPECIFICS_CUTOFF_OPTION );
final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
if ( dissallowed_options.length() > 0 ) {
ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
}
- final String separator;
+ double cutoff_specifics = SPECIFICS_CUTOFF_DEFAULT;
+ if ( cla.isOptionSet( SPECIFICS_CUTOFF_OPTION ) ) {
+ if ( cla.isOptionValueSet( SPECIFICS_CUTOFF_OPTION ) ) {
+ cutoff_specifics = cla.getOptionValueAsDouble( SPECIFICS_CUTOFF_OPTION );
+ if ( cutoff_specifics < 0 ) {
+ ForesterUtil.fatalError( PRG_NAME, "cutoff cannot be negative" );
+ }
+ }
+ else {
+ ForesterUtil.fatalError( PRG_NAME, "no value for cutoff for specifics" );
+ }
+ }
+ String separator = SEP_DEFAULT;
if ( cla.isOptionSet( SEP_OPTION ) ) {
- separator = cla.getOptionValue( SEP_OPTION );
+ if ( cla.isOptionValueSet( SEP_OPTION ) ) {
+ separator = cla.getOptionValue( SEP_OPTION );
+ }
+ else {
+ ForesterUtil.fatalError( PRG_NAME, "no value for separator option" );
+ }
}
- else {
- separator = null;
+ Pattern compiled_query_str = null;
+ if ( cla.isOptionSet( QUERY_PATTERN_OPTION ) ) {
+ if ( cla.isOptionValueSet( QUERY_PATTERN_OPTION ) ) {
+ final String query_str = cla.getOptionValue( QUERY_PATTERN_OPTION );
+ try {
+ compiled_query_str = Pattern.compile( query_str );
+ }
+ catch ( final PatternSyntaxException e ) {
+ ForesterUtil.fatalError( PRG_NAME, "error in regular expression: " + e.getMessage() );
+ }
+ }
+ else {
+ ForesterUtil.fatalError( PRG_NAME, "no value for query pattern option" );
+ }
}
+ final Pattern pattern = ( compiled_query_str != null ) ? compiled_query_str : QUERY_PATTERN_DEFAULT;
final File intreefile = cla.getFile( 0 );
- final String query = cla.getName( 1 );
- System.out.println( "Input tree: " + intreefile );
- System.out.println( "Query : " + query );
- if ( !ForesterUtil.isEmpty( separator ) ) {
- System.out.println( "Separator : " + separator );
+ final String error_intreefile = ForesterUtil.isReadableFile( intreefile );
+ if ( !ForesterUtil.isEmpty( error_intreefile ) ) {
+ ForesterUtil.fatalError( PRG_NAME, error_intreefile );
+ }
+ final File outtablefile;
+ if ( cla.getNumberOfNames() > 1 ) {
+ outtablefile = cla.getFile( 1 );
+ final String error_outtablefile = ForesterUtil.isWritableFile( outtablefile );
+ if ( !ForesterUtil.isEmpty( error_outtablefile ) ) {
+ ForesterUtil.fatalError( PRG_NAME, error_outtablefile );
+ }
}
else {
- System.out.println( "Separator : none" );
+ outtablefile = null;
+ }
+ System.out.println( "Input tree : " + intreefile );
+ System.out.println( "Specific-hit support cutoff: " + cutoff_specifics );
+ System.out.println( "Annotation-separator : " + separator );
+ System.out.println( "Query pattern : " + pattern );
+ if ( outtablefile != null ) {
+ System.out.println( "Output table : " + outtablefile );
}
Phylogeny p = null;
try {
p = factory.create( intreefile, pp )[ 0 ];
}
catch ( final IOException e ) {
- System.out.println( "\nCould not read \"" + intreefile + "\" [" + e.getMessage() + "]\n" );
+ ForesterUtil.fatalError( PRG_NAME, "Could not read \"" + intreefile + "\" [" + e.getMessage() + "]" );
System.exit( -1 );
}
- final Pattern pattern = Pattern.compile( query );
- final ResultMulti res = AnalysisMulti.execute( p, pattern, separator, 0.5 );
- System.out.println();
- System.out.println( "Result:" );
- System.out.println( "Query : " + query );
- ///////////////////
- System.out.println( "Collapsed:" );
+ System.out.println( "Ext. nodes in input tree : " + p.getNumberOfExternalNodes() );
+ final ResultMulti res = AnalysisMulti.execute( p, pattern, separator, cutoff_specifics );
+ printResult( res );
+ if ( outtablefile != null ) {
+ writeResultToTable( res, outtablefile );
+ }
+ }
+ catch ( final IllegalArgumentException e ) {
+ ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+ }
+ catch ( final IOException e ) {
+ ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace();
+ ForesterUtil.fatalError( PRG_NAME, "Unexpected errror!" );
+ }
+ }
+
+ private final static void printResult( final ResultMulti res ) {
+ System.out.println();
+ System.out.println( "Result:" );
+ System.out.println();
+ if ( ( res.getAllMultiHitPrefixes() == null ) | ( res.getAllMultiHitPrefixes().size() < 1 ) ) {
+ System.out.println( "No match to query pattern!" );
+ }
+ else {
+ System.out.println( "Matching Clade(s):" );
for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) {
System.out.println( prefix );
}
if ( res.isHasSpecificMultiHitsPrefixes() ) {
- System.out.println( "Specifics:" );
+ System.out.println();
+ System.out.println( "Specific-hit(s):" );
for( final Prefix prefix : res.getSpecificMultiHitPrefixes() ) {
System.out.println( prefix );
}
- System.out.println( "Collapsed With Specifics:" );
+ System.out.println();
+ System.out.println( "Matching Clade(s) with Specific-hit(s):" );
for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) {
System.out.println( prefix );
for( final Prefix spec : res.getSpecificMultiHitPrefixes() ) {
}
}
if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesDown() ) ) {
- System.out.println( "Collapsed Down:" );
+ System.out.println();
+ System.out.println( "Matching Down-tree Bracketing Clade(s):" );
for( final Prefix prefix : res.getCollapsedMultiHitPrefixesDown() ) {
System.out.println( prefix );
}
}
if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesUp() ) ) {
- System.out.println( "Collapsed Up:" );
- for( final Prefix prefix : res.getAllMultiHitPrefixesUp() ) {
+ System.out.println();
+ System.out.println( "Matching Up-tree Bracketing Clade(s):" );
+ for( final Prefix prefix : res.getCollapsedMultiHitPrefixesUp() ) {
System.out.println( prefix );
}
}
- ///////////////////
- System.out.println();
}
- catch ( final IllegalArgumentException e ) {
- ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+ System.out.println();
+ }
+
+ private final static void writeResultToTable( final ResultMulti res, final File outtablefile ) throws IOException {
+ final EasyWriter w = ForesterUtil.createEasyWriter( outtablefile );
+ if ( ( res.getAllMultiHitPrefixes() == null ) | ( res.getAllMultiHitPrefixes().size() < 1 ) ) {
+ w.println( "No match to query pattern!" );
}
- catch ( final Exception e ) {
- e.printStackTrace();
- ForesterUtil.fatalError( PRG_NAME, "Unexpected errror!" );
+ else {
+ for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) {
+ w.print( "Matching Clades" );
+ w.print( "\t" );
+ w.print( prefix.getPrefix() );
+ w.print( "\t" );
+ w.print( df.format( prefix.getConfidence() ) );
+ w.println();
+ }
+ if ( res.isHasSpecificMultiHitsPrefixes() ) {
+ for( final Prefix prefix : res.getSpecificMultiHitPrefixes() ) {
+ w.print( "Specific-hits" );
+ w.print( "\t" );
+ w.print( prefix.getPrefix() );
+ w.print( "\t" );
+ w.print( df.format( prefix.getConfidence() ) );
+ w.println();
+ }
+ }
+ if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesDown() ) ) {
+ for( final Prefix prefix : res.getCollapsedMultiHitPrefixesDown() ) {
+ w.print( "Matching Down-tree Bracketing Clades" );
+ w.print( "\t" );
+ w.print( prefix.getPrefix() );
+ w.print( "\t" );
+ w.print( df.format( prefix.getConfidence() ) );
+ w.println();
+ }
+ }
+ if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesUp() ) ) {
+ for( final Prefix prefix : res.getCollapsedMultiHitPrefixesUp() ) {
+ w.print( "Matching Up-tree Bracketing Clades" );
+ w.print( "\t" );
+ w.print( prefix.getPrefix() );
+ w.print( "\t" );
+ w.print( df.format( prefix.getConfidence() ) );
+ w.println();
+ }
+ }
}
+ w.flush();
+ w.close();
}
private final static void print_help() {
System.out.println( "Usage:" );
System.out.println();
- System.out.println( PRG_NAME + " [options] <gene tree file> <query>" );
+ System.out.println( PRG_NAME + " [options] <input tree file> [output table file]" );
System.out.println();
System.out.println( " options:" );
- System.out.println( " -" + SEP_OPTION + "=<separator>: the separator to be used" );
+ System.out.println( " -" + SPECIFICS_CUTOFF_OPTION
+ + "=<double>: the cutoff for \"specific-hit\" support values (default: " + SPECIFICS_CUTOFF_DEFAULT
+ + ")" );
+ System.out.println( " -" + SEP_OPTION + "=<separator>: the annotation-separator to be used (default: "
+ + SEP_DEFAULT + ")" );
+ System.out.println( " -" + QUERY_PATTERN_OPTION
+ + "=<query pattern>: the regular expression for the query (default: \"" + QUERY_PATTERN_DEFAULT
+ + "\" for pplacer output)" );
System.out.println();
System.out.println( "Example:" );
System.out.println();
- System.out.println( " " + PRG_NAME + " -s=. my_tree.xml A.1.1.1" );
+ System.out.println( " " + PRG_NAME + " -c=0.5 -s=. my_tree.nh result.tsv" );
System.out.println();
}
}
import org.forester.util.ForesterConstants;
import org.forester.util.ForesterUtil;
-public class serin {
+public class rid {
- final static private String PRG_NAME = "serin";
- final static private String PRG_DATE = "170830";
+ final static private String PRG_NAME = "rid";
+ final static private String PRG_DATE = "170902";
final static private String PRG_DESC = "sequence file reformatting and identifier normalization";
final static private String PRG_VERSION = "1.00";
final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
final static private String E_MAIL = "phyloxml@gmail.com";
final static private String OUTPUT_FORMAT_OPTION = "o";
- final static private String ID_NORM_OPTION = "i";
+ final static private String ID_NORM_OPTION = "s";
final static private String HELP_OPTION_1 = "help";
final static private String HELP_OPTION_2 = "h";
private static final String OUTPUT_FORMAT_FASTA = "f";
output_format = MSA_FORMAT.NEXUS;
}
else {
- ForesterUtil.fatalError( PRG_NAME, "unknown format option: " + output_format_str );
+ ForesterUtil.fatalError( PRG_NAME, "unknown output format option: " + output_format_str );
}
}
+ else {
+ ForesterUtil.fatalError( PRG_NAME, "no value for output format option" );
+ }
}
final boolean normalize_identifiers;
if ( cla.isOptionSet( ID_NORM_OPTION ) || ( cla.getNumberOfNames() == 3 ) ) {
System.out.println( "Sequence lenght min : " + ( int ) stats.getMin() );
System.out.println( "Sequence lenght max : " + ( int ) stats.getMax() );
if ( input_seqs.size() > 2 ) {
- System.out.println( "Sequence lenght median: " + ( int ) stats.median() );
+ System.out.println( "Sequence length median: " + ( int ) stats.median() );
}
if ( ( output_format == MSA_FORMAT.NEXUS ) || ( output_format == MSA_FORMAT.PHYLIP ) ) {
ForesterUtil.fatalError( PRG_NAME,
seq.getMolecularSequenceAsString() );
output_seqs.add( ns );
}
+ System.out.println();
if ( normalize_identifiers ) {
output_map_writer.flush();
output_map_writer.close();
- System.out.println();
System.out.println( "Wrote : " + output_map_file );
}
final BufferedWriter seq_writer = ForesterUtil.createBufferedWriter( outfile_seqs_file );
+ OUTPUT_FORMAT_FASTA + " for Fasta (default), " + OUTPUT_FORMAT_PHYLIP_L + " or "
+ OUTPUT_FORMAT_PHYLIP + " for Phylip, " + OUTPUT_FORMAT_NEXUS_L + " or " + OUTPUT_FORMAT_NEXUS
+ " for Nexus" );
- System.out.println( " -" + ID_NORM_OPTION + ": to replace sequence names with short(er) identifiers" );
+ System.out.println( " -" + ID_NORM_OPTION + " : to replace sequence names with short(er) identifiers" );
System.out.println();
System.out.println( "Example:" );
System.out.println();
- System.out.println( " " + PRG_NAME + " -i -o=p my_seqs.fasta" );
+ System.out.println( " " + PRG_NAME + " -s -o=p my_seqs.fasta" );
System.out.println();
}
}
public final class ResultMulti {
private final String _separator;
- private final List<Prefix> _greatest_common_prefixes = new ArrayList<Prefix>();
- private final List<Prefix> _greatest_common_prefixes_up = new ArrayList<Prefix>();
- private final List<Prefix> _greatest_common_prefixes_down = new ArrayList<Prefix>();
+ private final List<Prefix> _greatest_common_prefixes = new ArrayList<>();
+ private final List<Prefix> _greatest_common_prefixes_up = new ArrayList<>();
+ private final List<Prefix> _greatest_common_prefixes_down = new ArrayList<>();
private List<Prefix> _all = null;
private List<Prefix> _collapsed = null;
private List<Prefix> _cleaned_spec = null;
@Override
public final String toString() {
final StringBuilder sb = new StringBuilder();
- // sb.append( "Cleaned:" );
- // sb.append( ForesterUtil.LINE_SEPARATOR );
- // for( final Prefix prefix : _all ) {
- // sb.append( prefix );
- // sb.append( ForesterUtil.LINE_SEPARATOR );
- // }
- // sb.append( ForesterUtil.LINE_SEPARATOR );
- sb.append( "Collapsed:" );
+ sb.append( "Matching Clade(s):" );
sb.append( ForesterUtil.LINE_SEPARATOR );
for( final Prefix prefix : _collapsed ) {
sb.append( prefix );
}
if ( _has_specifics ) {
sb.append( ForesterUtil.LINE_SEPARATOR );
- sb.append( "Specifics:" );
+ sb.append( "Specific-hit(s):" );
sb.append( ForesterUtil.LINE_SEPARATOR );
for( final Prefix prefix : _cleaned_spec ) {
sb.append( prefix );
sb.append( ForesterUtil.LINE_SEPARATOR );
}
sb.append( ForesterUtil.LINE_SEPARATOR );
- sb.append( "Collapsed With Specifics:" );
+ sb.append( "Matching Clade(s) with Specific-hit(s):" );
sb.append( ForesterUtil.LINE_SEPARATOR );
for( final Prefix prefix : _collapsed ) {
sb.append( prefix );
}
if ( !ForesterUtil.isEmpty( _all_down ) ) {
sb.append( ForesterUtil.LINE_SEPARATOR );
- // sb.append( "Cleaned Down:" );
- // sb.append( ForesterUtil.LINE_SEPARATOR );
- // for( final Prefix prefix : _all_down ) {
- // sb.append( prefix );
- // sb.append( ForesterUtil.LINE_SEPARATOR );
- // }
- // sb.append( ForesterUtil.LINE_SEPARATOR );
- sb.append( "Collapsed Down:" );
+ sb.append( "Matching Down-tree Bracketing Clade(s):" );
sb.append( ForesterUtil.LINE_SEPARATOR );
for( final Prefix prefix : _collapsed_down ) {
sb.append( prefix );
sb.append( ForesterUtil.LINE_SEPARATOR );
}
- /* if ( _has_specifics_down ) {
- sb.append( ForesterUtil.LINE_SEPARATOR );
- sb.append( "Specifics Down:" );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- for( final Prefix prefix : _cleaned_spec_down ) {
- sb.append( prefix );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- }
- sb.append( ForesterUtil.LINE_SEPARATOR );
- sb.append( "Collapsed With Specifics Down:" );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- for( final Prefix prefix : _collapsed_down ) {
- sb.append( prefix );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- for( final Prefix spec : _cleaned_spec_down ) {
- if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
- sb.append( " " + spec );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- }
- }
- }
- }*/
}
if ( !ForesterUtil.isEmpty( _all_up ) ) {
sb.append( ForesterUtil.LINE_SEPARATOR );
- // sb.append( "Cleaned Up:" );
- // sb.append( ForesterUtil.LINE_SEPARATOR );
- // for( final Prefix prefix : _all_up ) {
- // sb.append( prefix );
- // sb.append( ForesterUtil.LINE_SEPARATOR );
- // }
- // sb.append( ForesterUtil.LINE_SEPARATOR );
- sb.append( "Collapsed Up:" );
+ sb.append( "Matching Up-tree Bracketing Clade(s):" );
sb.append( ForesterUtil.LINE_SEPARATOR );
for( final Prefix prefix : _collapsed_up ) {
sb.append( prefix );
sb.append( ForesterUtil.LINE_SEPARATOR );
}
- /* if ( _has_specifics ) {
- sb.append( ForesterUtil.LINE_SEPARATOR );
- sb.append( "Specifics Up:" );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- for( final Prefix prefix : _cleaned_spec_up ) {
- sb.append( prefix );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- }
- sb.append( ForesterUtil.LINE_SEPARATOR );
- sb.append( "Collapsed With Specifics Up:" );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- for( final Prefix prefix : _collapsed_up ) {
- sb.append( prefix );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- for( final Prefix spec : _cleaned_spec_up ) {
- if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
- sb.append( " " + spec );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- }
- }
- }
- }*/
}
return sb.toString();
}
}
private final void reset() {
- _all = new ArrayList<Prefix>();
- _collapsed = new ArrayList<Prefix>();
- _cleaned_spec = new ArrayList<Prefix>();
+ _all = new ArrayList<>();
+ _collapsed = new ArrayList<>();
+ _cleaned_spec = new ArrayList<>();
_has_specifics = false;
- _all_up = new ArrayList<Prefix>();
- _collapsed_up = new ArrayList<Prefix>();
- _cleaned_spec_up = new ArrayList<Prefix>();
+ _all_up = new ArrayList<>();
+ _collapsed_up = new ArrayList<>();
+ _cleaned_spec_up = new ArrayList<>();
_has_specifics_up = false;
- _all_down = new ArrayList<Prefix>();
- _collapsed_down = new ArrayList<Prefix>();
- _cleaned_spec_down = new ArrayList<Prefix>();
+ _all_down = new ArrayList<>();
+ _collapsed_down = new ArrayList<>();
+ _cleaned_spec_down = new ArrayList<>();
_has_specifics_down = false;
}