2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2017 Christian M. Zmasek
6 // Copyright (C) 2017 J. Craig Venter Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phyloxml @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25 // --------------------
27 // * Multiple "hits" with different "M" values
28 // * More tests (including multiple children per node), especially on edge cases
29 // * Utilize relevant support values for warnings
31 package org.forester.clade_analysis;
33 import java.util.ArrayList;
34 import java.util.Collections;
35 import java.util.List;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
39 import org.forester.phylogeny.Phylogeny;
40 import org.forester.phylogeny.PhylogenyNode;
41 import org.forester.phylogeny.data.Confidence;
42 import org.forester.util.ForesterUtil;
44 public final class AnalysisMulti {
46 private final static String UNKNOWN = "?";
47 public final static double DEFAULT_CUTOFF_FOR_SPECIFICS = 0.5;
48 public final static String DEFAULT_SEPARATOR = ".";
49 public final static Pattern DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE = Pattern.compile( ".+#\\d+_M=(.+)" );
52 public static ResultMulti execute( final Phylogeny p ) {
53 return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR, DEFAULT_CUTOFF_FOR_SPECIFICS );
56 public static ResultMulti execute( final Phylogeny p, final String separator ) {
57 return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator , DEFAULT_CUTOFF_FOR_SPECIFICS );
60 public static ResultMulti execute( final Phylogeny p, final String separator, final double cutoff_for_specifics ) {
61 return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator , cutoff_for_specifics );
64 public static ResultMulti execute( final Phylogeny p, final double cutoff_for_specifics ) {
65 return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR , cutoff_for_specifics );
68 public static ResultMulti execute( final Phylogeny p,
70 final String separator,
71 final double cutoff_for_specifics ) {
72 final List<PhylogenyNode> qnodes = p.getNodes( query );
73 final ResultMulti res = new ResultMulti();
74 for( int i = 0; i < qnodes.size(); ++i ) {
75 final PhylogenyNode qnode = qnodes.get( i );
76 //System.out.println( ">>" + qnode.getName() );
77 if ( qnode.isRoot() ) {
78 throw new IllegalArgumentException( "Query " + query + " is root." );
80 if ( qnode.getParent().isRoot() ) {
81 throw new IllegalArgumentException( "Parent of query " + query + " is root." );
83 PhylogenyNode qnode_p = qnode.getParent();
84 PhylogenyNode qnode_pp = qnode.getParent().getParent();
85 //This is to deal with internal nodes with 1 descendant.
86 while ( qnode_p.getNumberOfDescendants() == 1 ) {
87 qnode_p = qnode_p.getParent();
89 while ( qnode_pp.getNumberOfDescendants() == 1 ) {
90 qnode_pp = qnode_pp.getParent();
92 final List<String> qnode_ext_nodes_names = new ArrayList<>();
93 for( final PhylogenyNode qnode_ext_node : qnode_pp.getAllExternalDescendants() ) {
94 final String name = qnode_ext_node.getName();
95 if ( ForesterUtil.isEmptyTrimmed( name ) ) {
96 throw new IllegalArgumentException( "external node(s) with empty names found" );
98 final Matcher m = query.matcher( name );
100 qnode_ext_nodes_names.add( name );
103 final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names, separator );
104 // System.out.println( greatest_common_prefix );
105 Matcher matcher = query.matcher( qnode.getName() );
106 String conf_str = null;
107 if ( matcher.find() ) {
108 conf_str = matcher.group( 1 );
111 throw new IllegalStateException( "pattern did not match -- this should have never happened!" );
113 final double conf = Double.parseDouble( conf_str );
114 if ( !ForesterUtil.isEmpty( greatest_common_prefix ) ) {
115 res.addGreatestCommonPrefix( greatest_common_prefix, conf );
118 res.addGreatestCommonPrefix( UNKNOWN, conf );
120 //final String greatest_common_prefix_up[] = analyzeSiblings( qnode_p, qnode_pp, separator, query, res );
121 final String greatest_common_prefix_up = analyzeSiblings( qnode_p, qnode_pp, separator, query );
122 //System.out.println( "greatest_common_prefix_up=" + greatest_common_prefix_up + " " + conf );
123 if ( !ForesterUtil.isEmpty( greatest_common_prefix_up ) ) {
124 res.addGreatestCommonPrefixUp( greatest_common_prefix_up, conf );
127 res.addGreatestCommonPrefixUp( UNKNOWN, conf );
129 final String greatest_common_prefix_down = analyzeSiblings( qnode, qnode_p, separator, query );
130 // System.out.println( "greatest_common_prefix_down=" + greatest_common_prefix_down + " " + conf );
131 if ( !ForesterUtil.isEmpty( greatest_common_prefix_down ) ) {
132 res.addGreatestCommonPrefixDown( greatest_common_prefix_down, conf );
135 res.addGreatestCommonPrefixDown( UNKNOWN, conf );
138 res.analyze( cutoff_for_specifics );
142 private final static String analyzeSiblings( final PhylogenyNode child,
143 final PhylogenyNode parent,
144 final String separator,
145 final Pattern query ) {
146 final int child_index = child.getChildNodeIndex();
147 final List<String> ext_nodes_names = new ArrayList<>();
148 final List<PhylogenyNode> descs = parent.getDescendants();
149 // String conf = null;
150 for( int i = 0; i < descs.size(); ++i ) {
151 if ( i != child_index ) {
152 final PhylogenyNode d = descs.get( i );
153 for( final PhylogenyNode n : d.getAllExternalDescendants() ) {
154 final String name = n.getName();
155 if ( ForesterUtil.isEmptyTrimmed( name ) ) {
156 throw new IllegalArgumentException( "external node(s) with empty names found" );
158 final Matcher m = query.matcher( name );
160 ext_nodes_names.add( name );
163 // if ( descs.size() == 2 ) {
164 // conf = obtainConfidence( d );
168 final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names, separator );
169 return greatest_common_prefix;
172 private final static String obtainConfidence( final PhylogenyNode n ) {
173 if ( n.getBranchData().getConfidences() != null && n.getBranchData().getConfidences().size() > 0 ) {
174 final List<Confidence> confidences = n.getBranchData().getConfidences();
175 boolean not_first = false;
176 Collections.sort( confidences );
177 final StringBuilder sb = new StringBuilder();
178 for( final Confidence confidence : confidences ) {
179 final double value = confidence.getValue();
180 if ( value != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
187 sb.append( ( ForesterUtil.isEmpty( confidence.getType() ) ? "confidence: "
188 : confidence.getType() + ": " ) + value );
191 return sb.toString();