2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2017 Christian M. Zmasek
6 // Copyright (C) 2017 J. Craig Venter Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phyloxml @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25 // --------------------
27 // * Multiple "hits" with different "M" values
28 // * More tests (including multiple children per node), especially on edge cases
29 // * Utilize relevant support values for warnings
31 package org.forester.clade_analysis;
33 import java.util.ArrayList;
34 import java.util.Collections;
35 import java.util.List;
36 import java.util.SortedMap;
37 import java.util.regex.Matcher;
38 import java.util.regex.Pattern;
40 import org.forester.phylogeny.Phylogeny;
41 import org.forester.phylogeny.PhylogenyNode;
42 import org.forester.phylogeny.data.Confidence;
43 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
44 import org.forester.util.ForesterUtil;
45 import org.forester.util.UserException;
47 public final class AnalysisMulti {
49 private final static String UNKNOWN = "?";
50 public final static double DEFAULT_CUTOFF_FOR_SPECIFICS = 0.5;
51 public final static String DEFAULT_SEPARATOR = ".";
52 public final static Pattern DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE = Pattern.compile( ".+#\\d+_M=(.+)" );
54 public static ResultMulti execute( final Phylogeny p ) throws UserException {
55 return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR, DEFAULT_CUTOFF_FOR_SPECIFICS );
58 public static ResultMulti execute( final Phylogeny p, final String separator ) throws UserException {
59 return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator, DEFAULT_CUTOFF_FOR_SPECIFICS );
62 public static ResultMulti execute( final Phylogeny p, final String separator, final double cutoff_for_specifics )
63 throws UserException {
64 return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator, cutoff_for_specifics );
67 public static ResultMulti execute( final Phylogeny p, final double cutoff_for_specifics ) throws UserException {
68 return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR, cutoff_for_specifics );
71 public static ResultMulti execute( final Phylogeny p,
73 final String separator,
74 final double cutoff_for_specifics )
75 throws UserException {
76 cleanUpExternalNames( p, separator );
77 final List<PhylogenyNode> qnodes = p.getNodes( query );
78 final ResultMulti res = new ResultMulti();
79 for( int i = 0; i < qnodes.size(); ++i ) {
80 final PhylogenyNode qnode = qnodes.get( i );
81 if ( qnode.isRoot() ) {
82 throw new UserException( "query " + query + " is root" );
84 if ( qnode.getParent().isRoot() ) {
85 throw new UserException( "parent of query " + query + " is root" );
87 PhylogenyNode qnode_p = qnode.getParent();
88 PhylogenyNode qnode_pp = qnode.getParent().getParent();
89 //This is to deal with internal nodes with 1 descendant.
90 while ( qnode_p.getNumberOfDescendants() == 1 ) {
91 qnode_p = qnode_p.getParent();
93 while ( qnode_pp.getNumberOfDescendants() == 1 ) {
94 qnode_pp = qnode_pp.getParent();
96 final List<String> qnode_ext_nodes_names = new ArrayList<>();
97 for( final PhylogenyNode qnode_ext_node : qnode_pp.getAllExternalDescendants() ) {
98 final String name = qnode_ext_node.getName();
99 final Matcher m = query.matcher( name );
101 qnode_ext_nodes_names.add( name );
104 final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names, separator );
105 final Matcher matcher = query.matcher( qnode.getName() );
106 String conf_str = null;
107 if ( matcher.find() ) {
108 conf_str = matcher.group( 1 );
111 throw new IllegalStateException( "pattern did not match -- this should have never happened!" );
113 final double conf = Double.parseDouble( conf_str );
114 if ( !ForesterUtil.isEmpty( greatest_common_prefix ) ) {
115 res.addGreatestCommonPrefix( greatest_common_prefix, conf );
118 res.addGreatestCommonPrefix( UNKNOWN, conf );
120 final String greatest_common_prefix_up = analyzeSiblings( qnode_p, qnode_pp, separator, query );
121 if ( !ForesterUtil.isEmpty( greatest_common_prefix_up ) ) {
122 res.addGreatestCommonPrefixUp( greatest_common_prefix_up, conf );
125 res.addGreatestCommonPrefixUp( UNKNOWN, conf );
127 final String greatest_common_prefix_down = analyzeSiblings( qnode, qnode_p, separator, query );
128 if ( !ForesterUtil.isEmpty( greatest_common_prefix_down ) ) {
129 res.addGreatestCommonPrefixDown( greatest_common_prefix_down, conf );
132 res.addGreatestCommonPrefixDown( UNKNOWN, conf );
135 res.analyze( cutoff_for_specifics );
139 private final static void cleanUpExternalNames( final Phylogeny p, final String separator ) throws UserException {
140 final Pattern pattern1 = Pattern.compile( "\\Q" + separator + "\\E" + "\\s+" );
141 final Pattern pattern2 = Pattern.compile( "\\s+" + "\\Q" + separator + "\\E" );
142 final Pattern pattern3 = Pattern.compile( "\\Q" + separator + separator + "\\E" );
143 final PhylogenyNodeIterator it = p.iteratorExternalForward();
144 while ( it.hasNext() ) {
145 final PhylogenyNode node = it.next();
146 final String name = node.getName().trim();
147 if ( ForesterUtil.isEmpty( name ) ) {
148 throw new UserException( "external node(s) with empty annotation found" );
150 if ( name.endsWith( separator ) ) {
151 throw new UserException( "illegally formatted annotation found: annotations cannot end with separator: "
154 if ( name.startsWith( separator ) ) {
155 throw new UserException( "illegally formatted annotation found: annotations cannot start with separator: "
158 if ( pattern1.matcher( name ).find() ) {
159 throw new UserException( "illegally formatted annotation found: separator followed by whitespace: "
162 if ( pattern2.matcher( name ).find() ) {
163 throw new UserException( "illegally formatted annotation found: whitespace followed by separator: "
166 if ( pattern3.matcher( name ).find() ) {
167 throw new UserException( "illegally formatted annotation found: empty annotation level: " + name );
169 node.setName( name.replaceAll( "\\s+", " " ) );
173 private final static String analyzeSiblings( final PhylogenyNode child,
174 final PhylogenyNode parent,
175 final String separator,
176 final Pattern query ) {
177 final int child_index = child.getChildNodeIndex();
178 final List<String> ext_nodes_names = new ArrayList<>();
179 final List<PhylogenyNode> descs = parent.getDescendants();
180 for( int i = 0; i < descs.size(); ++i ) {
181 if ( i != child_index ) {
182 final PhylogenyNode d = descs.get( i );
183 for( final PhylogenyNode n : d.getAllExternalDescendants() ) {
184 final String name = n.getName();
185 final Matcher m = query.matcher( name );
187 ext_nodes_names.add( name );
192 final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names, separator );
193 return greatest_common_prefix;
196 private final static String obtainConfidence( final PhylogenyNode n ) {
197 if ( ( n.getBranchData().getConfidences() != null ) && ( n.getBranchData().getConfidences().size() > 0 ) ) {
198 final List<Confidence> confidences = n.getBranchData().getConfidences();
199 boolean not_first = false;
200 Collections.sort( confidences );
201 final StringBuilder sb = new StringBuilder();
202 for( final Confidence confidence : confidences ) {
203 final double value = confidence.getValue();
204 if ( value != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
211 sb.append( ( ForesterUtil.isEmpty( confidence.getType() ) ? "confidence: "
212 : confidence.getType() + ": " ) + value );
215 return sb.toString();
220 public final static void performMapping( final Pattern pattern,
221 final SortedMap<String, String> map,
223 final boolean verbose )
224 throws UserException {
226 System.out.println();
227 System.out.println( "Id to annotation mapping:" );
229 final PhylogenyNodeIterator it = p.iteratorExternalForward();
230 while ( it.hasNext() ) {
231 final PhylogenyNode node = it.next();
232 final String name = node.getName().trim();
233 if ( ForesterUtil.isEmpty( name ) ) {
234 throw new UserException( "external node with empty name found" );
236 final Matcher m = pattern.matcher( name );
238 if ( !map.containsKey( name ) ) {
239 throw new UserException( "no mapping for \"" + name + "\" found" );
241 node.setName( map.get( name ).trim() );
243 System.out.println( name + " -> " + node.getName() );
248 System.out.println();
252 public final static void performExtraProcessing1( final Pattern pattern,
254 final String extra_sep,
256 final String annotation_sep,
257 final boolean verbose )
258 throws UserException {
260 System.out.println();
261 System.out.println( "Extra annotation processing:" );
263 final PhylogenyNodeIterator it = p.iteratorExternalForward();
264 while ( it.hasNext() ) {
265 final PhylogenyNode node = it.next();
266 final String name = node.getName().trim();
267 if ( ForesterUtil.isEmpty( name ) ) {
268 throw new UserException( "external node with empty name found" );
270 final Matcher m = pattern.matcher( name );
272 final StringBuilder sb = new StringBuilder();
273 final int last_index = name.lastIndexOf( extra_sep );
274 if ( last_index >= 0 ) {
275 final String annotation = name.substring( last_index + 1 ).trim();
276 if ( ForesterUtil.isEmptyTrimmed( annotation ) ) {
277 throw new UserException( "illegal format:" + name );
280 final String extra = name.substring( 0, last_index ).trim();
281 sb.append( annotation );
282 if ( !ForesterUtil.isEmpty( extra ) ) {
283 sb.append( annotation_sep );
288 sb.append( annotation );
290 node.setName( sb.toString() );
292 System.out.println( name + " -> " + node.getName() );
298 System.out.println();