package org.forester.application;
import java.io.File;
+import java.io.IOException;
import java.text.DecimalFormat;
+import java.util.ArrayList;
+import java.util.List;
import org.forester.clade_analysis.Analysis;
import org.forester.clade_analysis.Result;
public final class cladinator {
final static private String PRG_NAME = "cladinator";
- final static private String PRG_VERSION = "0.100";
- final static private String PRG_DATE = "170721";
+ final static private String PRG_VERSION = "0.101";
+ final static private String PRG_DATE = "170810";
final static private String PRG_DESC = "clades within clades -- analysis of pplacer type outputs";
final static private String E_MAIL = "phyloxml@gmail.com";
final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
final static private String HELP_OPTION_1 = "help";
final static private String HELP_OPTION_2 = "h";
+ final static private String SEP_OPTION = "s";
private final static DecimalFormat df2 = new DecimalFormat( ".##" );
public static void main( final String args[] ) {
print_help();
System.exit( 0 );
}
- else if ( ( args.length != 2 ) ) {
+ else if ( ( args.length != 2 && args.length != 3 ) ) {
System.out.println();
System.out.println( "Wrong number of arguments." );
System.out.println();
print_help();
System.exit( -1 );
}
- //final List<String> allowed_options = new ArrayList<>();
+ final List<String> allowed_options = new ArrayList<String>();
+ allowed_options.add( SEP_OPTION );
+ final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
+ if ( dissallowed_options.length() > 0 ) {
+ ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
+ }
+ final String separator;
+ if ( cla.isOptionSet( SEP_OPTION ) ) {
+ separator = cla.getOptionValue( SEP_OPTION );
+ }
+ else {
+ separator = null;
+ }
final File intreefile = cla.getFile( 0 );
final String query = cla.getName( 1 );
System.out.println( "Input tree: " + intreefile );
- System.out.println( "Query: " + query );
+ System.out.println( "Query : " + query );
+ if ( !ForesterUtil.isEmpty( separator ) ) {
+ System.out.println( "Separator : " + separator );
+ }
+ else {
+ System.out.println( "Separator : none" );
+ }
Phylogeny p = null;
try {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( intreefile, true );
p = factory.create( intreefile, pp )[ 0 ];
}
- catch ( final Exception e ) {
+ catch ( final IOException e ) {
System.out.println( "\nCould not read \"" + intreefile + "\" [" + e.getMessage() + "]\n" );
System.exit( -1 );
}
- final Result res = Analysis.execute( p, query );
+ final Result res = Analysis.execute( p, query, separator );
System.out.println();
System.out.println( "Result:" );
- System.out.println( "Greatest common prefix : " + res.getGreatestCommonPrefix() );
- System.out.println( "Greatest common prefix up : " + res.getGreatestCommonPrefixUp() );
- System.out.println( "Greatest common prefix down: " + res.getGreatestCommonPrefixDown() );
+ System.out.println( "Greatest Common Prefix : " + res.getGreatestCommonPrefix() );
+ System.out.println( "Greatest Common Prefix Up : " + res.getGreatestCommonPrefixUp() );
+ System.out.println( "Greatest Common Prefix Down : " + res.getGreatestCommonPrefixDown() );
+
+ if ( !ForesterUtil.isEmpty( res.getGreatestCommonCladeConfidence() ) ) {
+ System.out.println( "Greatest Common Clade Conf : " + res.getGreatestCommonCladeConfidence() );
+ }
+ if ( !ForesterUtil.isEmpty( res.getGreatestCommonCladeUpConfidence() ) ) {
+ System.out.println( "Greatest Common Clade Up Conf: " + res.getGreatestCommonCladeUpConfidence() );
+ }
+ if ( !ForesterUtil.isEmpty( res.getGreatestCommonCladeDownConfidence() ) ) {
+ System.out.println( "Greatest Common Clade Down Conf: " + res.getGreatestCommonCladeDownConfidence() );
+ }
+
+ System.out.println( "Least Encompassing Clade size: " + res.getLeastEncompassingCladeSize()
+ + " external nodes" );
final double lec_ratio = ( 100.0 * res.getLeastEncompassingCladeSize() ) / res.getTreeSize();
- System.out.println( "Least Encompassing Clade has " + res.getLeastEncompassingCladeSize()
- + " external nodes (" + df2.format( lec_ratio ) + "% of a total of " + res.getTreeSize() + ")" );
+ System.out.println( "Least Encompassing Clade size: " + df2.format( lec_ratio ) + "%" );
+ System.out.println( "Total tree size : " + res.getTreeSize() + " external nodes" );
if ( res.getWarnings().size() > 0 ) {
System.out.println( "Warnings:" );
for( final String s : res.getWarnings() ) {
System.out.println( s );
}
}
+ System.out.println();
}
- catch ( final Exception e ) {
+ catch ( final IllegalArgumentException e ) {
ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
}
+ catch ( final Exception e ) {
+ e.printStackTrace();
+ ForesterUtil.fatalError( PRG_NAME, "Unexpected errror!" );
+ }
}
private final static void print_help() {
- System.out.println( "Usage: " + PRG_NAME + " <gene tree file> <query>" );
+ System.out.println( "Usage:" );
+ System.out.println();
+ System.out.println( PRG_NAME + " [options] <gene tree file> <query>" );
+ System.out.println();
+ System.out.println( " options:" );
+ System.out.println( " -" + SEP_OPTION + "=<separator>: the separator to be used" );
+ System.out.println();
+ System.out.println( "Example:" );
+ System.out.println();
+ System.out.println( " " + PRG_NAME + " -s=. my_tree.xml A.1.1.1" );
System.out.println();
}
}
// * Multiple "hits" with different "M" values
// * More tests (including multiple children per node), especially on edge cases
// * Utilize relevant support values for warnings
-// * Better system for "clade label creation" (e.g. 1.3.4 + 1.3.6 -> 1.3), use
-// specific separator (eg . | _ )
package org.forester.clade_analysis;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Confidence;
import org.forester.util.ForesterUtil;
public final class Analysis {
- public static Result execute( final Phylogeny p, final String query ) {
+ public static Result execute( final Phylogeny p, final String query, final String separator ) {
final PhylogenyNode qnode = p.getNode( query );
if ( qnode.isRoot() ) {
throw new IllegalStateException( "Unexpected error: Query " + query
throw new IllegalStateException( "Unexpected error: Parent of query " + query
+ " is root. This should have never happened" );
}
- final PhylogenyNode qnode_p = qnode.getParent();
- final PhylogenyNode qnode_pp = qnode.getParent().getParent();
+ PhylogenyNode qnode_p = qnode.getParent();
+ PhylogenyNode qnode_pp = qnode.getParent().getParent();
+ while ( qnode_p.getNumberOfDescendants() == 1 ) {
+ qnode_p = qnode_p.getParent();
+ }
+ while ( qnode_pp.getNumberOfDescendants() == 1 ) {
+ qnode_pp = qnode_pp.getParent();
+ }
final List<PhylogenyNode> qnode_ext_nodes = qnode_pp.getAllExternalDescendants();
final int lec_ext_nodes = qnode_ext_nodes.size() - 1;
final int p_ext_nodes = p.getNumberOfExternalNodes() - 1;
qnode_ext_nodes_names.add( name );
}
}
- final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names );
+ final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names, separator );
final Result res = new Result();
if ( greatest_common_prefix.length() < 1 ) {
res.addWarning( "No greatest common prefix" );
}
res.setLeastEncompassingCladeSize( lec_ext_nodes );
res.setTreeSize( p_ext_nodes );
- final String greatest_common_prefix_a = analyzeSiblings( qnode_p, qnode_pp );
+ if ( qnode_pp.getBranchData().getConfidences() != null
+ && qnode_pp.getBranchData().getConfidences().size() > 0 ) {
+ final Confidence conf = qnode_pp.getBranchData().getConfidence( 0 );
+ if ( conf != null ) {
+ res.setGreatestCommonCladeConfidence( conf.getValue()
+ + ( ForesterUtil.isEmpty( conf.getType() ) ? "" : " [" + conf.getType() + "]" ) );
+ }
+ }
+ final String greatest_common_prefix_a = analyzeSiblings( qnode_p, qnode_pp, separator );
res.setGreatestCommonPrefixUp( greatest_common_prefix_a );
- final String greatest_common_prefix_b = analyzeSiblings( qnode, qnode_p );
+ final String greatest_common_prefix_b = analyzeSiblings( qnode, qnode_p, separator );
res.setGreatestCommonPrefixDown( greatest_common_prefix_b );
return res;
}
- private final static String analyzeSiblings( final PhylogenyNode child, final PhylogenyNode parent ) {
+ private final static String analyzeSiblings( final PhylogenyNode child,
+ final PhylogenyNode parent,
+ final String separator ) {
final int child_index = child.getChildNodeIndex();
final List<String> ext_nodes_names = new ArrayList<>();
final List<PhylogenyNode> descs = parent.getDescendants();
}
}
}
- final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names );
+ final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names, separator );
return greatest_common_prefix;
}
}
public final class Result {
- private String _greatest_common_prefix = "";
- private String _greatest_common_prefix_up = "";
- private String _greatest_common_prefix_down = "";
- private final List<String> _warnings = new ArrayList<>();
- private int _lec_ext_nodes = 0;
- private int _p_ext_nodes = 0;
+ private String _greatest_common_prefix = "";
+ private String _greatest_common_prefix_up = "";
+ private String _greatest_common_prefix_down = "";
+ private final List<String> _warnings = new ArrayList<>();
+ private int _lec_ext_nodes = 0;
+ private int _p_ext_nodes = 0;
+ private String _greatest_common_clade_confidence = "";
+ private String _greatest_common_clade_confidence_up = "";
+ private String _greatest_common_clade_confidence_down = "";
void addWarning( final String warning ) {
_warnings.add( warning );
_greatest_common_prefix_down = greatest_common_prefix_down;
}
+ void setGreatestCommonCladeConfidence( final String greatest_common_clade_confidence ) {
+ _greatest_common_clade_confidence = greatest_common_clade_confidence;
+ }
+
+ void setGreatestCommonCladeUpConfidence( final String greatest_common_clade_confidence_up ) {
+ _greatest_common_clade_confidence_up = greatest_common_clade_confidence_up;
+ }
+
+ void setGreatestCommonCladeDownConfidence( final String greatest_common_clade_confidence_down ) {
+ _greatest_common_clade_confidence_down = greatest_common_clade_confidence_down;
+ }
+
public String getGreatestCommonPrefix() {
return _greatest_common_prefix;
}
return _greatest_common_prefix_down;
}
+ public String getGreatestCommonCladeConfidence() {
+ return _greatest_common_clade_confidence;
+ }
+
+ public String getGreatestCommonCladeUpConfidence() {
+ return _greatest_common_clade_confidence_up;
+ }
+
+ public String getGreatestCommonCladeDownConfidence() {
+ return _greatest_common_clade_confidence_down;
+ }
+
public List<String> getWarnings() {
return _warnings;
}
import org.forester.archaeopteryx.AptxUtil;
import org.forester.archaeopteryx.TreePanelUtil;
import org.forester.archaeopteryx.webservices.WebserviceUtil;
+import org.forester.clade_analysis.CladeAnalysisTest;
import org.forester.development.DevelopmentTools;
import org.forester.evoinference.TestPhylogenyReconstruction;
import org.forester.evoinference.matrix.character.CharacterStateMatrix;
}
System.out.println( "OK." );
+ System.out.print( "Common prefix sep: " );
+ if ( !testCommonPrefixSep() ) {
+ System.out.println( "failed." );
+ failed++;
+ }
+ else {
+ succeeded++;
+ }
+ System.out.println( "OK." );
+
+
System.out.print( "Sequence writer: " );
if ( testSequenceWriter() ) {
System.out.println( "OK." );
System.out.println( "failed." );
failed++;
}
+ System.out.print( "Clade analyis: " );
+ if ( CladeAnalysisTest.test() ) {
+ System.out.println( "OK." );
+ succeeded++;
+ }
+ else {
+ System.out.println( "failed." );
+ failed++;
+ }
System.out.print( "Phylogeny reconstruction:" );
System.out.println();
if ( TestPhylogenyReconstruction.test( new File( PATH_TO_TEST_DATA ) ) ) {
}
return true;
}
+
+ private static boolean testCommonPrefixSep() {
+ final List<String> l0 = new ArrayList<String>();
+ l0.add( "a.b.c" );
+ if ( !ForesterUtil.greatestCommonPrefix( l0, ".").equals( "a.b.c" ) ) {
+ return false;
+ }
+
+ final List<String> l1 = new ArrayList<String>();
+ l1.add( "a.b.c" );
+ l1.add( "a.b.X" );
+ if ( !ForesterUtil.greatestCommonPrefix( l1 , ".").equals( "a.b" ) ) {
+ return false;
+ }
+
+ final List<String> l2 = new ArrayList<String>();
+ l2.add( "a.b.c." );
+ l2.add( "a.b.X." );
+ l2.add( "a.x.y." );
+ if ( !ForesterUtil.greatestCommonPrefix( l2, ".").equals( "a" ) ) {
+ return false;
+ }
+
+ final List<String> l3 = new ArrayList<String>();
+ l3.add( "a/b/X/s/d/f/s/d/f/s/d/f/s/d/f/s/d/f/s/d/" );
+ l3.add( "a/b/X/s/d/f/s/d/f/s/d/f/s/d/f/s/d/f/s/d" );
+ l3.add( "a/b/c" );
+ l3.add( "a/b/X/s/d/f/s/d/f/s/d/f/s/d/f/s/d/f/s/d/" );
+ l3.add( "a/b/" );
+ l3.add( "a/b/c/" );
+ l3.add( "a/b////////" );
+ if ( !ForesterUtil.greatestCommonPrefix( l3, "/" ).equals( "a/b" ) ) {
+ return false;
+ }
+
+ final List<String> l4 = new ArrayList<String>();
+ l4.add( "a.b.X.s.d.f.s.d.f.s.d.f.s.d.f.s.d.f.s.d" );
+ l4.add( "a.b.X.s.d.f.s.d.f.s.d.f.s.d.f.s.d.f.s.d" );
+ l4.add( "a.b.c" );
+ l4.add( "X.s.d.f.s.d.f.s.d.f.s.d.f.s.d.f.s.d..." );
+ l4.add( "a.b" );
+ l4.add( "a.b.c" );
+ if ( !ForesterUtil.greatestCommonPrefix( l4, "." ).equals( "" ) ) {
+ return false;
+ }
+
+ final List<String> l5 = new ArrayList<String>();
+ l5.add( "" );
+ if ( !ForesterUtil.greatestCommonPrefix( l5, "_" ).equals( "" ) ) {
+ return false;
+ }
+
+ final List<String> l6 = new ArrayList<String>();
+ l6.add( "_" );
+ l6.add( "__" );
+ if ( !ForesterUtil.greatestCommonPrefix( l6, "_" ).equals( "" ) ) {
+ return false;
+ }
+
+ final List<String> l7 = new ArrayList<String>();
+ l7.add( "a,b,c" );
+ l7.add( "a,b,X" );
+ l7.add( "" );
+ l7.add( ",,,,,,,,,," );
+ if ( !ForesterUtil.greatestCommonPrefix( l7, "," ).equals( "" ) ) {
+ return false;
+ }
+
+ final List<String> l8 = new ArrayList<String>();
+ l8.add( "123.304.403.04" );
+ l8.add( "123.304.403.04.02" );
+ l8.add( "123.304.403.03.03" );
+ if ( !ForesterUtil.greatestCommonPrefix( l8, "." ).equals( "123.304.403" ) ) {
+ return false;
+ }
+
+ final List<String> l9 = new ArrayList<String>();
+ l9.add( "123.304.403.04" );
+ l9.add( "123.304.403.04.02" );
+ l9.add( "123.304.402.03.03" );
+ if ( !ForesterUtil.greatestCommonPrefix( l9, "." ).equals( "123.304" ) ) {
+ return false;
+ }
+ return true;
+ }
private static boolean testUTF8ParsingFromFile() {
final public static boolean isEmpty( final String s ) {
return ( ( s == null ) || ( s.length() < 1 ) );
}
-
+
final public static boolean isEmptyTrimmed( final String s ) {
- if ( s == null ) {
- return true;
- }
- return ( ( s.trim().length() < 1 ) );
+ if ( s == null ) {
+ return true;
+ }
+ return ( ( s.trim().length() < 1 ) );
}
/**
return a.substring( 0, min_length );
}
+ public final static String greatestCommonPrefix( final String a, final String b, final String separator ) {
+ if ( ForesterUtil.isEmpty( separator ) ) {
+ throw new IllegalArgumentException( "separator must not be null or empty" );
+ }
+ final String[] as = a.split( Pattern.quote( separator ) );
+ final String[] bs = b.split( Pattern.quote( separator ) );
+ final int min_length = Math.min( as.length, bs.length );
+ for( int i = 0; i < min_length; ++i ) {
+ if ( !( as[ i ].equals( bs[ i ] ) ) ) {
+ StringBuilder sb = new StringBuilder();
+ boolean first = true;
+ for( int j = 0; j < i; ++j ) {
+ if ( first ) {
+ first = false;
+ }
+ else {
+ sb.append( separator );
+ }
+ sb.append( as[ j ] );
+ }
+ return sb.toString();
+ }
+ }
+ StringBuilder sb = new StringBuilder();
+ boolean first = true;
+ for( int j = 0; j < min_length; ++j ) {
+ if ( first ) {
+ first = false;
+ }
+ else {
+ sb.append( separator );
+ }
+ sb.append( as[ j ] );
+ }
+ return sb.toString();
+ }
+
public final static String greatestCommonPrefix( final List<String> strings ) {
if ( strings == null ) {
- throw new IllegalArgumentException( "list is null" );
+ throw new IllegalArgumentException( "list of strings is null" );
}
if ( strings.isEmpty() ) {
- throw new IllegalArgumentException( "list is empty" );
+ throw new IllegalArgumentException( "list of strings is empty" );
}
String common = strings.get( 0 );
for( int i = 1; i < strings.size(); ++i ) {
return common;
}
+ public final static String greatestCommonPrefix( final List<String> strings, final String separator ) {
+ if ( ForesterUtil.isEmpty( separator ) ) {
+ return greatestCommonPrefix( strings );
+ }
+ if ( strings == null ) {
+ throw new IllegalArgumentException( "list of strings is null" );
+ }
+ if ( strings.isEmpty() ) {
+ throw new IllegalArgumentException( "list of strings is empty" );
+ }
+ String common = strings.get( 0 );
+ for( int i = 1; i < strings.size(); ++i ) {
+ common = greatestCommonPrefix( common, strings.get( i ), separator );
+ }
+ return common;
+ }
+
private ForesterUtil() {
}
}