2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2017 Christian M. Zmasek
6 // Copyright (C) 2017 J. Craig Venter Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phyloxml @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25 // --------------------
27 // * Multiple "hits" with different "M" values
28 // * More tests (including multiple children per node), especially on edge cases
29 // * Utilize relevant support values for warnings
31 package org.forester.clade_analysis;
33 import java.util.ArrayList;
34 import java.util.Collections;
35 import java.util.List;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
39 import org.forester.phylogeny.Phylogeny;
40 import org.forester.phylogeny.PhylogenyNode;
41 import org.forester.phylogeny.data.Confidence;
42 import org.forester.util.ForesterUtil;
44 public final class Analysis2 {
46 public static Result2 execute( final Phylogeny p, final Pattern query, final String separator ) {
47 final List<PhylogenyNode> qnodes = p.getNodes( query );
48 final Result2 res = new Result2();
49 for( int i = 0; i < qnodes.size(); ++i ) {
50 final PhylogenyNode qnode = qnodes.get( i );
51 System.out.println( ">>" + qnode.getName() );
52 if ( qnode.isRoot() ) {
53 throw new IllegalArgumentException( "Query " + query + " is root." );
55 if ( qnode.getParent().isRoot() ) {
56 throw new IllegalArgumentException( "Parent of query " + query + " is root." );
58 PhylogenyNode qnode_p = qnode.getParent();
59 PhylogenyNode qnode_pp = qnode.getParent().getParent();
60 //This is to deal with internal nodes with 1 descendant.
61 while ( qnode_p.getNumberOfDescendants() == 1 ) {
62 qnode_p = qnode_p.getParent();
64 while ( qnode_pp.getNumberOfDescendants() == 1 ) {
65 qnode_pp = qnode_pp.getParent();
67 // final List<PhylogenyNode> qnode_ext_nodes = new ArrayList<PhylogenyNode>();
68 final List<String> qnode_ext_nodes_names = new ArrayList<>();
69 for( final PhylogenyNode qnode_ext_node : qnode_pp.getAllExternalDescendants() ) {
70 final String name = qnode_ext_node.getName();
71 if ( ForesterUtil.isEmptyTrimmed( name ) ) {
72 throw new IllegalArgumentException( "external node(s) with empty names found" );
74 final Matcher m = query.matcher( name );
76 qnode_ext_nodes_names.add( name );
79 final int lec_ext_nodes = qnode_ext_nodes_names.size();
80 final int p_ext_nodes = p.getNumberOfExternalNodes() - 1;
81 final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names, separator );
82 System.out.println( greatest_common_prefix );
83 Matcher matcher = query.matcher( qnode.getName() );
84 String conf_str = null;
85 if ( matcher.find() ) {
86 conf_str = matcher.group( 1 );
89 throw new IllegalStateException( "pattern did not match -- this should have never happened!" );
91 res.setLeastEncompassingCladeSize( lec_ext_nodes );
92 res.setTreeSize( p_ext_nodes );
93 final double conf = Double.parseDouble( conf_str );
94 if ( !ForesterUtil.isEmpty( greatest_common_prefix ) ) {
95 res.addGreatestCommonPrefix( greatest_common_prefix, conf );
98 res.addGreatestCommonPrefix( "?", conf );
101 /* for( final PhylogenyNode qnode_ext_node : qnode_ext_nodes ) {
102 String name = qnode_ext_node.getName();
103 if ( ForesterUtil.isEmptyTrimmed( name ) ) {
104 throw new IllegalArgumentException( "external node(s) with empty names found" );
107 if ( !name.equals( query ) ) {
108 qnode_ext_nodes_names.add( name );
111 // if ( greatest_common_prefix.length() < 1 ) {
112 // res.addWarning( "No greatest common prefix" );
113 //res.setGreatestCommonPrefix( "" );
116 // // res.setGreatestCommonPrefix( greatest_common_prefix );
117 // res.addGreatestCommonPrefix( prefix, confidence, separator ); //TODO
119 // if ( qnode_pp.isRoot() ) {
120 // res.addWarning( "Least Encompassing Clade is entire tree" );
122 /* final String conf = obtainConfidence( qnode_pp );
123 if ( conf != null ) {
124 res.setGreatestCommonCladeSubtreeConfidence(conf);
126 /* final String greatest_common_prefix_up[] = analyzeSiblings( qnode_p, qnode_pp, separator );
127 res.setGreatestCommonPrefixUp( greatest_common_prefix_up[ 0 ] );
128 if ( greatest_common_prefix_up[ 1 ] != null ) {
129 res.setGreatestCommonCladeUpSubtreeConfidence( greatest_common_prefix_up[ 1 ] );
131 final String greatest_common_prefix_down[] = analyzeSiblings( qnode, qnode_p, separator );
132 res.setGreatestCommonPrefixDown( greatest_common_prefix_down[ 0 ] );
133 if ( greatest_common_prefix_down[ 1 ] != null ) {
134 res.setGreatestCommonCladeDownSubtreeConfidence( greatest_common_prefix_down[ 1 ] );
139 private final static String[] analyzeSiblings( final PhylogenyNode child,
140 final PhylogenyNode parent,
141 final String separator ) {
142 final int child_index = child.getChildNodeIndex();
143 final List<String> ext_nodes_names = new ArrayList<>();
144 final List<PhylogenyNode> descs = parent.getDescendants();
146 for( int i = 0; i < descs.size(); ++i ) {
147 if ( i != child_index ) {
148 final PhylogenyNode d = descs.get( i );
149 for( final PhylogenyNode n : d.getAllExternalDescendants() ) {
150 final String name = n.getName();
151 if ( ForesterUtil.isEmptyTrimmed( name ) ) {
152 throw new IllegalArgumentException( "external node(s) with empty names found" );
154 ext_nodes_names.add( name.trim() );
156 if ( descs.size() == 2 ) {
157 conf = obtainConfidence( d );
161 final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names, separator );
162 return new String[] { greatest_common_prefix, conf };
165 private final static String obtainConfidence( final PhylogenyNode n ) {
166 if ( n.getBranchData().getConfidences() != null && n.getBranchData().getConfidences().size() > 0 ) {
167 final List<Confidence> confidences = n.getBranchData().getConfidences();
168 boolean not_first = false;
169 Collections.sort( confidences );
170 final StringBuilder sb = new StringBuilder();
171 for( final Confidence confidence : confidences ) {
172 final double value = confidence.getValue();
173 if ( value != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
180 sb.append( ( ForesterUtil.isEmpty( confidence.getType() ) ? "confidence: "
181 : confidence.getType() + ": " ) + value );
184 return sb.toString();