2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2017 Christian M. Zmasek
6 // Copyright (C) 2017 J. Craig Venter Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phyloxml @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25 // --------------------
27 // * Multiple "hits" with different "M" values
28 // * More tests (including multiple children per node), especially on edge cases
29 // * Utilize relevant support values for warnings
31 package org.forester.clade_analysis;
33 import java.util.ArrayList;
34 import java.util.Collections;
35 import java.util.List;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
39 import org.forester.phylogeny.Phylogeny;
40 import org.forester.phylogeny.PhylogenyNode;
41 import org.forester.phylogeny.data.Confidence;
42 import org.forester.util.ForesterUtil;
44 public final class Analysis2 {
46 public static Result2 execute( final Phylogeny p, final Pattern query, final String separator ) {
47 final List<PhylogenyNode> qnodes = p.getNodes( query );
48 final Result2 res = new Result2();
49 for( int i = 0; i < qnodes.size(); ++i ) {
50 final PhylogenyNode qnode = qnodes.get( i );
51 System.out.println( ">>" + qnode.getName() );
52 if ( qnode.isRoot() ) {
53 throw new IllegalArgumentException( "Query " + query + " is root." );
55 if ( qnode.getParent().isRoot() ) {
56 throw new IllegalArgumentException( "Parent of query " + query + " is root." );
58 PhylogenyNode qnode_p = qnode.getParent();
59 PhylogenyNode qnode_pp = qnode.getParent().getParent();
60 //This is to deal with internal nodes with 1 descendant.
61 while ( qnode_p.getNumberOfDescendants() == 1 ) {
62 qnode_p = qnode_p.getParent();
64 while ( qnode_pp.getNumberOfDescendants() == 1 ) {
65 qnode_pp = qnode_pp.getParent();
67 // final List<PhylogenyNode> qnode_ext_nodes = new ArrayList<PhylogenyNode>();
68 final List<String> qnode_ext_nodes_names = new ArrayList<>();
69 for( final PhylogenyNode qnode_ext_node : qnode_pp.getAllExternalDescendants() ) {
70 final String name = qnode_ext_node.getName();
71 if ( ForesterUtil.isEmptyTrimmed( name ) ) {
72 throw new IllegalArgumentException( "external node(s) with empty names found" );
74 final Matcher m = query.matcher( name );
76 qnode_ext_nodes_names.add( name );
79 //final int lec_ext_nodes = qnode_ext_nodes_names.size();
80 //final int p_ext_nodes = p.getNumberOfExternalNodes() - 1;
81 final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names, separator );
82 // System.out.println( greatest_common_prefix );
83 Matcher matcher = query.matcher( qnode.getName() );
84 String conf_str = null;
85 if ( matcher.find() ) {
86 conf_str = matcher.group( 1 );
89 throw new IllegalStateException( "pattern did not match -- this should have never happened!" );
91 final double conf = Double.parseDouble( conf_str );
92 if ( !ForesterUtil.isEmpty( greatest_common_prefix ) ) {
93 res.addGreatestCommonPrefix( greatest_common_prefix, conf );
96 res.addGreatestCommonPrefix( "?", conf );
98 //final String greatest_common_prefix_up[] = analyzeSiblings( qnode_p, qnode_pp, separator, query, res );
99 final String greatest_common_prefix_up = analyzeSiblings( qnode_p, qnode_pp, separator, query );
100 System.out.println( "greatest_common_prefix_up=" + greatest_common_prefix_up + " " + conf);
101 if ( !ForesterUtil.isEmpty( greatest_common_prefix_up) ) {
102 res.addGreatestCommonPrefixUp( greatest_common_prefix_up, conf );
105 res.addGreatestCommonPrefixUp( "?", conf );
107 // res.addGreatestCommonPrefixUp( greatest_common_prefix_up, conf );
108 //res.addGreatestCommonPrefixUp( greatest_common_prefix_up[ 0 ], 0.1 );
109 // res.setGreatestCommonPrefixUp( greatest_common_prefix_up[ 0 ] );
110 //if ( greatest_common_prefix_up[ 1 ] != null ) {
111 // res.setGreatestCommonCladeUpSubtreeConfidence( greatest_common_prefix_up[ 1 ] );
113 // final String greatest_common_prefix_down[] = analyzeSiblings( qnode, qnode_p, separator,query, res );
114 final String greatest_common_prefix_down = analyzeSiblings( qnode, qnode_p, separator, query );
115 System.out.println( "greatest_common_prefix_down=" + greatest_common_prefix_down+ " " + conf);
116 if ( !ForesterUtil.isEmpty( greatest_common_prefix_down) ) {
117 res.addGreatestCommonPrefixDown( greatest_common_prefix_down, conf );
120 res.addGreatestCommonPrefixDown( "?", conf );
122 //res.addGreatestCommonPrefixDown( greatest_common_prefix_down, conf );
123 // res.setGreatestCommonPrefixDown( greatest_common_prefix_down[ 0 ] );
124 // if ( greatest_common_prefix_down[ 1 ] != null ) {
125 // res.setGreatestCommonCladeDownSubtreeConfidence( greatest_common_prefix_down[ 1 ] );
128 /* for( final PhylogenyNode qnode_ext_node : qnode_ext_nodes ) {
129 String name = qnode_ext_node.getName();
130 if ( ForesterUtil.isEmptyTrimmed( name ) ) {
131 throw new IllegalArgumentException( "external node(s) with empty names found" );
134 if ( !name.equals( query ) ) {
135 qnode_ext_nodes_names.add( name );
138 // if ( greatest_common_prefix.length() < 1 ) {
139 // res.addWarning( "No greatest common prefix" );
140 //res.setGreatestCommonPrefix( "" );
143 // // res.setGreatestCommonPrefix( greatest_common_prefix );
144 // res.addGreatestCommonPrefix( prefix, confidence, separator ); //TODO
146 // if ( qnode_pp.isRoot() ) {
147 // res.addWarning( "Least Encompassing Clade is entire tree" );
149 /* final String conf = obtainConfidence( qnode_pp );
150 if ( conf != null ) {
151 res.setGreatestCommonCladeSubtreeConfidence(conf);
153 /* final String greatest_common_prefix_up[] = analyzeSiblings( qnode_p, qnode_pp, separator );
154 res.setGreatestCommonPrefixUp( greatest_common_prefix_up[ 0 ] );
155 if ( greatest_common_prefix_up[ 1 ] != null ) {
156 res.setGreatestCommonCladeUpSubtreeConfidence( greatest_common_prefix_up[ 1 ] );
158 final String greatest_common_prefix_down[] = analyzeSiblings( qnode, qnode_p, separator );
159 res.setGreatestCommonPrefixDown( greatest_common_prefix_down[ 0 ] );
160 if ( greatest_common_prefix_down[ 1 ] != null ) {
161 res.setGreatestCommonCladeDownSubtreeConfidence( greatest_common_prefix_down[ 1 ] );
166 private final static void analyzeSiblingsOLD( final PhylogenyNode child,
167 final PhylogenyNode parent,
168 final String separator,
172 final int child_index = child.getChildNodeIndex();
173 final List<String> ext_nodes_names = new ArrayList<>();
174 final List<PhylogenyNode> descs = parent.getDescendants();
175 // String conf = null;
176 for( int i = 0; i < descs.size(); ++i ) {
177 if ( i != child_index ) {
178 final PhylogenyNode d = descs.get( i );
179 for( final PhylogenyNode n : d.getAllExternalDescendants() ) {
180 final String name = n.getName();
181 if ( ForesterUtil.isEmptyTrimmed( name ) ) {
182 throw new IllegalArgumentException( "external node(s) with empty names found" );
184 ext_nodes_names.add( name.trim() );
186 // if ( descs.size() == 2 ) {
187 // conf = obtainConfidence( d );
191 ////////////////////////////////////////////////////////////
192 /* Matcher matcher = query.matcher( child.getName() );
193 String conf_str = null;
194 if ( matcher.find() ) {
195 conf_str = matcher.group( 1 );
198 throw new IllegalStateException( "pattern did not match for \"" + child.getName() + "\" -- this should have never happened!" );
200 ////////////////////////////////////////////////////////////
201 final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names, separator );
202 //final double conf = Double.parseDouble( conf_str );
203 if ( !ForesterUtil.isEmpty( greatest_common_prefix ) ) {
204 res.addGreatestCommonPrefix( greatest_common_prefix, conf2 );
207 res.addGreatestCommonPrefix( "?", conf2 );
211 private final static String analyzeSiblings( final PhylogenyNode child,
212 final PhylogenyNode parent,
213 final String separator,
214 final Pattern query) {
215 final int child_index = child.getChildNodeIndex();
216 final List<String> ext_nodes_names = new ArrayList<>();
217 final List<PhylogenyNode> descs = parent.getDescendants();
218 // String conf = null;
219 for( int i = 0; i < descs.size(); ++i ) {
220 if ( i != child_index ) {
221 final PhylogenyNode d = descs.get( i );
222 for( final PhylogenyNode n : d.getAllExternalDescendants() ) {
223 final String name = n.getName();
224 if ( ForesterUtil.isEmptyTrimmed( name ) ) {
225 throw new IllegalArgumentException( "external node(s) with empty names found" );
230 final Matcher m = query.matcher( name );
232 ext_nodes_names.add( name );
238 // if ( descs.size() == 2 ) {
239 // conf = obtainConfidence( d );
243 ////////////////////////////////////////////////////////////
244 /* Matcher matcher = query.matcher( child.getName() );
245 String conf_str = null;
246 if ( matcher.find() ) {
247 conf_str = matcher.group( 1 );
250 throw new IllegalStateException( "pattern did not match for \"" + child.getName() + "\" -- this should have never happened!" );
252 ////////////////////////////////////////////////////////////
253 final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names, separator );
254 //final double conf = Double.parseDouble( conf_str );
255 return greatest_common_prefix;
258 private final static String obtainConfidence( final PhylogenyNode n ) {
259 if ( n.getBranchData().getConfidences() != null && n.getBranchData().getConfidences().size() > 0 ) {
260 final List<Confidence> confidences = n.getBranchData().getConfidences();
261 boolean not_first = false;
262 Collections.sort( confidences );
263 final StringBuilder sb = new StringBuilder();
264 for( final Confidence confidence : confidences ) {
265 final double value = confidence.getValue();
266 if ( value != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
273 sb.append( ( ForesterUtil.isEmpty( confidence.getType() ) ? "confidence: "
274 : confidence.getType() + ": " ) + value );
277 return sb.toString();