// FORESTER -- software libraries and applications
// for evolutionary biology research and applications.
//
-// Copyright (C) 2008-2009 Christian M. Zmasek
-// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2017 Christian M. Zmasek
+// Copyright (C) 2017 J. Craig Venter Institute
// All rights reserved
//
// This library is free software; you can redistribute it and/or
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
-// Contact: phylosoft @ gmail . com
+// Contact: phyloxml @ gmail . com
// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.application;
import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
+import java.text.DecimalFormat;
+import org.forester.clade_analysis.Analysis;
+import org.forester.clade_analysis.Result;
import org.forester.io.parsers.PhylogenyParser;
-import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.util.ParserUtils;
-import org.forester.io.writers.PhylogenyWriter;
import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyMethods;
-import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.util.CommandLineArguments;
import org.forester.util.ForesterUtil;
-public class cladinator {
+public final class cladinator {
- final static private String PRG_NAME = "cladinator";
- final static private String PRG_VERSION = "0.100";
- final static private String PRG_DATE = "170721";
- final static private String PRG_DESC = "clades within clades";
- final static private String E_MAIL = "phyloxml@gmail.com";
- final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
- final static private String HELP_OPTION_1 = "help";
- final static private String HELP_OPTION_2 = "h";
+ final static private String PRG_NAME = "cladinator";
+ final static private String PRG_VERSION = "0.100";
+ final static private String PRG_DATE = "170721";
+ final static private String PRG_DESC = "clades within clades -- analysis of pplacer type outputs";
+ final static private String E_MAIL = "phyloxml@gmail.com";
+ final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
+ final static private String HELP_OPTION_1 = "help";
+ final static private String HELP_OPTION_2 = "h";
+ private final static DecimalFormat df2 = new DecimalFormat( ".##" );
public static void main( final String args[] ) {
try {
print_help();
System.exit( -1 );
}
- final List<String> allowed_options = new ArrayList<String>();
+ //final List<String> allowed_options = new ArrayList<>();
final File intreefile = cla.getFile( 0 );
final String query = cla.getName( 1 );
System.out.println( "Input tree: " + intreefile );
System.out.println( "\nCould not read \"" + intreefile + "\" [" + e.getMessage() + "]\n" );
System.exit( -1 );
}
- execute( p, query );
+ final Result res = Analysis.execute( p, query );
+ System.out.println();
+ System.out.println( "Result:" );
+ System.out.println( "Greatest common prefix a : " + res.getGreatestCommonPrefix() );
+ System.out.println( "Greatest common prefix a (up) : " + res.getGreatestCommonPrefixUp() );
+ System.out.println( "Greatest common prefix b (down): " + res.getGreatestCommonPrefixDown() );
+ final double lec_ratio = ( 100.0 * res.getLeastEncompassingCladeSize() ) / res.getTreeSize();
+ System.out.println( "Least Encompassing Clade has " + res.getLeastEncompassingCladeSize()
+ + " external nodes (" + df2.format( lec_ratio ) + "% of a total of " + res.getTreeSize() + ")" );
+ if ( res.getWarnings().size() > 0 ) {
+ System.out.println( "Warnings:" );
+ for( final String s : res.getWarnings() ) {
+ System.out.println( s );
+ }
+ }
}
catch ( final Exception e ) {
ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
}
}
- private static void execute( final Phylogeny p, final String query ) {
- final PhylogenyNode qnode = p.getNode( query );
- if ( qnode.isRoot() ) {
- throw new IllegalStateException( "Unexpected error: Query " + query
- + " is root. This should have never happened" );
- }
- if ( qnode.getParent().isRoot() ) {
- throw new IllegalStateException( "Unexpected error: Parent of query " + query
- + " is root. This should have never happened" );
- }
- final PhylogenyNode qnode_pp = qnode.getParent().getParent();
- final List<PhylogenyNode> qnode_ext_nodes = qnode_pp.getAllExternalDescendants();
- final int lec_ext_nodes = qnode_ext_nodes.size() - 1;
- final int p_ext_nodes = p.getNumberOfExternalNodes() - 1;
- final double lec_ratio = ( 100.0 * lec_ext_nodes ) / p_ext_nodes;
- final List<String> qnode_ext_nodes_names = new ArrayList<String>();
- for( final PhylogenyNode qnode_ext_node : qnode_ext_nodes ) {
- String name = qnode_ext_node.getName();
- if ( ForesterUtil.isEmptyTrimmed( name ) ) {
- throw new IllegalArgumentException( "external node(s) with empty names found" );
- }
- name = name.trim();
- if ( !name.equals( query ) ) {
- qnode_ext_nodes_names.add( name );
- }
- }
- final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names );
- System.out.println( );
- System.out.println( "Results:");
- if ( greatest_common_prefix.length() < 1 ) {
- System.out.println( "WARNING: No greatest common prefix" );
- }
- else {
- System.out.println( "Greatest common prefix: " + greatest_common_prefix );
- }
- if ( qnode_pp.isRoot() ) {
- System.out.println( "WARNING: Least Encompassing Clade is entire tree" );
- }
- System.out.println( "Least Encompassing Clade has " + lec_ext_nodes + " external nodes (" +lec_ratio + "% of a total of "+ p_ext_nodes +")" );
- }
-
private final static void print_help() {
- System.out.println( "Usage: " + PRG_NAME
- + " <gene tree file> <query>" );
+ System.out.println( "Usage: " + PRG_NAME + " <gene tree file> <query>" );
System.out.println();
}
}
--- /dev/null
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2017 Christian M. Zmasek
+// Copyright (C) 2017 J. Craig Venter Institute
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phyloxml @ gmail . com
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
+// --------------------
+// TODO
+// * Multiple "hits" with different "M" values
+// * More tests (including multiple children per node), especially on edge cases
+// * Utilize relevant support values for warnings
+// * Better system for "clade label creation" (e.g. 1.3.4 + 1.3.6 -> 1.3), use
+// specific separator (eg . | _ )
+
+package org.forester.clade_analysis;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.util.ForesterUtil;
+
+public final class Analysis {
+
+ public static Result execute( final Phylogeny p, final String query ) {
+ final PhylogenyNode qnode = p.getNode( query );
+ if ( qnode.isRoot() ) {
+ throw new IllegalStateException( "Unexpected error: Query " + query
+ + " is root. This should have never happened" );
+ }
+ if ( qnode.getParent().isRoot() ) {
+ throw new IllegalStateException( "Unexpected error: Parent of query " + query
+ + " is root. This should have never happened" );
+ }
+ final PhylogenyNode qnode_p = qnode.getParent();
+ final PhylogenyNode qnode_pp = qnode.getParent().getParent();
+ final List<PhylogenyNode> qnode_ext_nodes = qnode_pp.getAllExternalDescendants();
+ final int lec_ext_nodes = qnode_ext_nodes.size() - 1;
+ final int p_ext_nodes = p.getNumberOfExternalNodes() - 1;
+ final List<String> qnode_ext_nodes_names = new ArrayList<>();
+ for( final PhylogenyNode qnode_ext_node : qnode_ext_nodes ) {
+ String name = qnode_ext_node.getName();
+ if ( ForesterUtil.isEmptyTrimmed( name ) ) {
+ throw new IllegalArgumentException( "external node(s) with empty names found" );
+ }
+ name = name.trim();
+ if ( !name.equals( query ) ) {
+ qnode_ext_nodes_names.add( name );
+ }
+ }
+ final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names );
+ final Result res = new Result();
+ if ( greatest_common_prefix.length() < 1 ) {
+ res.addWarning( "No greatest common prefix" );
+ res.setGreatestCommonPrefix( "" );
+ }
+ else {
+ res.setGreatestCommonPrefix( greatest_common_prefix );
+ }
+ if ( qnode_pp.isRoot() ) {
+ res.addWarning( "Least Encompassing Clade is entire tree" );
+ }
+ res.setLeastEncompassingCladeSize( lec_ext_nodes );
+ res.setTreeSize( p_ext_nodes );
+ final String greatest_common_prefix_a = analyzeSiblings( qnode_p, qnode_pp );
+ res.setGreatestCommonPrefixUp( greatest_common_prefix_a );
+ final String greatest_common_prefix_b = analyzeSiblings( qnode, qnode_p );
+ res.setGreatestCommonPrefixDown( greatest_common_prefix_b );
+ return res;
+ }
+
+ private final static String analyzeSiblings( final PhylogenyNode child, final PhylogenyNode parent ) {
+ final int qnode_p_index = child.getChildNodeIndex();
+ final List<String> qnode_ext_nodes_names_a = new ArrayList<>();
+ final List<PhylogenyNode> descs = parent.getDescendants();
+ for( int i = 0; i < descs.size(); ++i ) {
+ if ( i != qnode_p_index ) {
+ final PhylogenyNode d = descs.get( i );
+ for( final PhylogenyNode n : d.getAllExternalDescendants() ) {
+ final String name = n.getName();
+ if ( ForesterUtil.isEmptyTrimmed( name ) ) {
+ throw new IllegalArgumentException( "external node(s) with empty names found" );
+ }
+ qnode_ext_nodes_names_a.add( name.trim() );
+ }
+ }
+ }
+ final String greatest_common_prefix_a = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names_a );
+ return greatest_common_prefix_a;
+ }
+}
--- /dev/null
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2017 Christian M. Zmasek
+// Copyright (C) 2017 J. Craig Venter Institute
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phyloxml @ gmail . com
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
+
+package org.forester.clade_analysis;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public final class Result {
+
+ private String _greatest_common_prefix = "";
+ private String _greatest_common_prefix_up = "";
+ private String _greatest_common_prefix_down = "";
+ private final List<String> _warnings = new ArrayList<>();
+ private int _lec_ext_nodes = 0;
+ private int _p_ext_nodes = 0;
+
+ void addWarning( final String warning ) {
+ _warnings.add( warning );
+ }
+
+ void setGreatestCommonPrefix( final String greatest_common_prefix ) {
+ _greatest_common_prefix = greatest_common_prefix;
+ }
+
+ void setGreatestCommonPrefixUp( final String greatest_common_prefix_up ) {
+ _greatest_common_prefix_up = greatest_common_prefix_up;
+ }
+
+ void setGreatestCommonPrefixDown( final String greatest_common_prefix_down ) {
+ _greatest_common_prefix_down = greatest_common_prefix_down;
+ }
+
+ public String getGreatestCommonPrefix() {
+ return _greatest_common_prefix;
+ }
+
+ public String getGreatestCommonPrefixUp() {
+ return _greatest_common_prefix_up;
+ }
+
+ public String getGreatestCommonPrefixDown() {
+ return _greatest_common_prefix_down;
+ }
+
+ public List<String> getWarnings() {
+ return _warnings;
+ }
+
+ void setLeastEncompassingCladeSize( final int lec_ext_nodes ) {
+ _lec_ext_nodes = lec_ext_nodes;
+ }
+
+ void setTreeSize( final int p_ext_nodes ) {
+ _p_ext_nodes = p_ext_nodes;
+ }
+
+ public int getLeastEncompassingCladeSize() {
+ return _lec_ext_nodes;
+ }
+
+ public int getTreeSize() {
+ return _p_ext_nodes;
+ }
+}