From c08b5843d037b629aba2a0c6b8867cbba53284e9 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Fri, 21 Jul 2017 15:30:57 -0700 Subject: [PATCH] first prototype --- .../src/org/forester/application/cladinator.java | 98 ++++++----------- .../src/org/forester/clade_analysis/Analysis.java | 110 ++++++++++++++++++++ .../src/org/forester/clade_analysis/Result.java | 87 ++++++++++++++++ 3 files changed, 230 insertions(+), 65 deletions(-) create mode 100644 forester/java/src/org/forester/clade_analysis/Analysis.java create mode 100644 forester/java/src/org/forester/clade_analysis/Result.java diff --git a/forester/java/src/org/forester/application/cladinator.java b/forester/java/src/org/forester/application/cladinator.java index 9e9c945..13143b0 100644 --- a/forester/java/src/org/forester/application/cladinator.java +++ b/forester/java/src/org/forester/application/cladinator.java @@ -2,8 +2,8 @@ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// Copyright (C) 2017 Christian M. Zmasek +// Copyright (C) 2017 J. Craig Venter Institute // All rights reserved // // This library is free software; you can redistribute it and/or @@ -20,39 +20,35 @@ // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // -// Contact: phylosoft @ gmail . com +// Contact: phyloxml @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +import java.text.DecimalFormat; +import org.forester.clade_analysis.Analysis; +import org.forester.clade_analysis.Result; import org.forester.io.parsers.PhylogenyParser; -import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.util.ParserUtils; -import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyMethods; -import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; -public class cladinator { +public final class cladinator { - final static private String PRG_NAME = "cladinator"; - final static private String PRG_VERSION = "0.100"; - final static private String PRG_DATE = "170721"; - final static private String PRG_DESC = "clades within clades"; - final static private String E_MAIL = "phyloxml@gmail.com"; - final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; + final static private String PRG_NAME = "cladinator"; + final static private String PRG_VERSION = "0.100"; + final static private String PRG_DATE = "170721"; + final static private String PRG_DESC = "clades within clades -- analysis of pplacer type outputs"; + final static private String E_MAIL = "phyloxml@gmail.com"; + final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + private final static DecimalFormat df2 = new DecimalFormat( ".##" ); public static void main( final String args[] ) { try { @@ -82,7 +78,7 @@ public class cladinator { print_help(); System.exit( -1 ); } - final List allowed_options = new ArrayList(); + //final List allowed_options = new ArrayList<>(); final File intreefile = cla.getFile( 0 ); final String query = cla.getName( 1 ); System.out.println( "Input tree: " + intreefile ); @@ -97,57 +93,29 @@ public class cladinator { System.out.println( "\nCould not read \"" + intreefile + "\" [" + e.getMessage() + "]\n" ); System.exit( -1 ); } - execute( p, query ); + final Result res = Analysis.execute( p, query ); + System.out.println(); + System.out.println( "Result:" ); + System.out.println( "Greatest common prefix a : " + res.getGreatestCommonPrefix() ); + System.out.println( "Greatest common prefix a (up) : " + res.getGreatestCommonPrefixUp() ); + System.out.println( "Greatest common prefix b (down): " + res.getGreatestCommonPrefixDown() ); + final double lec_ratio = ( 100.0 * res.getLeastEncompassingCladeSize() ) / res.getTreeSize(); + System.out.println( "Least Encompassing Clade has " + res.getLeastEncompassingCladeSize() + + " external nodes (" + df2.format( lec_ratio ) + "% of a total of " + res.getTreeSize() + ")" ); + if ( res.getWarnings().size() > 0 ) { + System.out.println( "Warnings:" ); + for( final String s : res.getWarnings() ) { + System.out.println( s ); + } + } } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } } - private static void execute( final Phylogeny p, final String query ) { - final PhylogenyNode qnode = p.getNode( query ); - if ( qnode.isRoot() ) { - throw new IllegalStateException( "Unexpected error: Query " + query - + " is root. This should have never happened" ); - } - if ( qnode.getParent().isRoot() ) { - throw new IllegalStateException( "Unexpected error: Parent of query " + query - + " is root. This should have never happened" ); - } - final PhylogenyNode qnode_pp = qnode.getParent().getParent(); - final List qnode_ext_nodes = qnode_pp.getAllExternalDescendants(); - final int lec_ext_nodes = qnode_ext_nodes.size() - 1; - final int p_ext_nodes = p.getNumberOfExternalNodes() - 1; - final double lec_ratio = ( 100.0 * lec_ext_nodes ) / p_ext_nodes; - final List qnode_ext_nodes_names = new ArrayList(); - for( final PhylogenyNode qnode_ext_node : qnode_ext_nodes ) { - String name = qnode_ext_node.getName(); - if ( ForesterUtil.isEmptyTrimmed( name ) ) { - throw new IllegalArgumentException( "external node(s) with empty names found" ); - } - name = name.trim(); - if ( !name.equals( query ) ) { - qnode_ext_nodes_names.add( name ); - } - } - final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names ); - System.out.println( ); - System.out.println( "Results:"); - if ( greatest_common_prefix.length() < 1 ) { - System.out.println( "WARNING: No greatest common prefix" ); - } - else { - System.out.println( "Greatest common prefix: " + greatest_common_prefix ); - } - if ( qnode_pp.isRoot() ) { - System.out.println( "WARNING: Least Encompassing Clade is entire tree" ); - } - System.out.println( "Least Encompassing Clade has " + lec_ext_nodes + " external nodes (" +lec_ratio + "% of a total of "+ p_ext_nodes +")" ); - } - private final static void print_help() { - System.out.println( "Usage: " + PRG_NAME - + " " ); + System.out.println( "Usage: " + PRG_NAME + " " ); System.out.println(); } } diff --git a/forester/java/src/org/forester/clade_analysis/Analysis.java b/forester/java/src/org/forester/clade_analysis/Analysis.java new file mode 100644 index 0000000..a154b12 --- /dev/null +++ b/forester/java/src/org/forester/clade_analysis/Analysis.java @@ -0,0 +1,110 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2017 Christian M. Zmasek +// Copyright (C) 2017 J. Craig Venter Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phyloxml @ gmail . com +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester +// -------------------- +// TODO +// * Multiple "hits" with different "M" values +// * More tests (including multiple children per node), especially on edge cases +// * Utilize relevant support values for warnings +// * Better system for "clade label creation" (e.g. 1.3.4 + 1.3.6 -> 1.3), use +// specific separator (eg . | _ ) + +package org.forester.clade_analysis; + +import java.util.ArrayList; +import java.util.List; + +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.util.ForesterUtil; + +public final class Analysis { + + public static Result execute( final Phylogeny p, final String query ) { + final PhylogenyNode qnode = p.getNode( query ); + if ( qnode.isRoot() ) { + throw new IllegalStateException( "Unexpected error: Query " + query + + " is root. This should have never happened" ); + } + if ( qnode.getParent().isRoot() ) { + throw new IllegalStateException( "Unexpected error: Parent of query " + query + + " is root. This should have never happened" ); + } + final PhylogenyNode qnode_p = qnode.getParent(); + final PhylogenyNode qnode_pp = qnode.getParent().getParent(); + final List qnode_ext_nodes = qnode_pp.getAllExternalDescendants(); + final int lec_ext_nodes = qnode_ext_nodes.size() - 1; + final int p_ext_nodes = p.getNumberOfExternalNodes() - 1; + final List qnode_ext_nodes_names = new ArrayList<>(); + for( final PhylogenyNode qnode_ext_node : qnode_ext_nodes ) { + String name = qnode_ext_node.getName(); + if ( ForesterUtil.isEmptyTrimmed( name ) ) { + throw new IllegalArgumentException( "external node(s) with empty names found" ); + } + name = name.trim(); + if ( !name.equals( query ) ) { + qnode_ext_nodes_names.add( name ); + } + } + final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names ); + final Result res = new Result(); + if ( greatest_common_prefix.length() < 1 ) { + res.addWarning( "No greatest common prefix" ); + res.setGreatestCommonPrefix( "" ); + } + else { + res.setGreatestCommonPrefix( greatest_common_prefix ); + } + if ( qnode_pp.isRoot() ) { + res.addWarning( "Least Encompassing Clade is entire tree" ); + } + res.setLeastEncompassingCladeSize( lec_ext_nodes ); + res.setTreeSize( p_ext_nodes ); + final String greatest_common_prefix_a = analyzeSiblings( qnode_p, qnode_pp ); + res.setGreatestCommonPrefixUp( greatest_common_prefix_a ); + final String greatest_common_prefix_b = analyzeSiblings( qnode, qnode_p ); + res.setGreatestCommonPrefixDown( greatest_common_prefix_b ); + return res; + } + + private final static String analyzeSiblings( final PhylogenyNode child, final PhylogenyNode parent ) { + final int qnode_p_index = child.getChildNodeIndex(); + final List qnode_ext_nodes_names_a = new ArrayList<>(); + final List descs = parent.getDescendants(); + for( int i = 0; i < descs.size(); ++i ) { + if ( i != qnode_p_index ) { + final PhylogenyNode d = descs.get( i ); + for( final PhylogenyNode n : d.getAllExternalDescendants() ) { + final String name = n.getName(); + if ( ForesterUtil.isEmptyTrimmed( name ) ) { + throw new IllegalArgumentException( "external node(s) with empty names found" ); + } + qnode_ext_nodes_names_a.add( name.trim() ); + } + } + } + final String greatest_common_prefix_a = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names_a ); + return greatest_common_prefix_a; + } +} diff --git a/forester/java/src/org/forester/clade_analysis/Result.java b/forester/java/src/org/forester/clade_analysis/Result.java new file mode 100644 index 0000000..1f3af94 --- /dev/null +++ b/forester/java/src/org/forester/clade_analysis/Result.java @@ -0,0 +1,87 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2017 Christian M. Zmasek +// Copyright (C) 2017 J. Craig Venter Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phyloxml @ gmail . com +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester + +package org.forester.clade_analysis; + +import java.util.ArrayList; +import java.util.List; + +public final class Result { + + private String _greatest_common_prefix = ""; + private String _greatest_common_prefix_up = ""; + private String _greatest_common_prefix_down = ""; + private final List _warnings = new ArrayList<>(); + private int _lec_ext_nodes = 0; + private int _p_ext_nodes = 0; + + void addWarning( final String warning ) { + _warnings.add( warning ); + } + + void setGreatestCommonPrefix( final String greatest_common_prefix ) { + _greatest_common_prefix = greatest_common_prefix; + } + + void setGreatestCommonPrefixUp( final String greatest_common_prefix_up ) { + _greatest_common_prefix_up = greatest_common_prefix_up; + } + + void setGreatestCommonPrefixDown( final String greatest_common_prefix_down ) { + _greatest_common_prefix_down = greatest_common_prefix_down; + } + + public String getGreatestCommonPrefix() { + return _greatest_common_prefix; + } + + public String getGreatestCommonPrefixUp() { + return _greatest_common_prefix_up; + } + + public String getGreatestCommonPrefixDown() { + return _greatest_common_prefix_down; + } + + public List getWarnings() { + return _warnings; + } + + void setLeastEncompassingCladeSize( final int lec_ext_nodes ) { + _lec_ext_nodes = lec_ext_nodes; + } + + void setTreeSize( final int p_ext_nodes ) { + _p_ext_nodes = p_ext_nodes; + } + + public int getLeastEncompassingCladeSize() { + return _lec_ext_nodes; + } + + public int getTreeSize() { + return _p_ext_nodes; + } +} -- 1.7.10.2