2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2017 Christian M. Zmasek
6 // Copyright (C) 2017 J. Craig Venter Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phyloxml @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25 // --------------------
27 // * Multiple "hits" with different "M" values
28 // * More tests (including multiple children per node), especially on edge cases
29 // * Utilize relevant support values for warnings
31 package org.forester.clade_analysis;
33 import java.util.ArrayList;
34 import java.util.List;
35 import java.util.SortedMap;
36 import java.util.regex.Matcher;
37 import java.util.regex.Pattern;
39 import org.forester.phylogeny.Phylogeny;
40 import org.forester.phylogeny.PhylogenyNode;
41 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
42 import org.forester.util.ForesterUtil;
43 import org.forester.util.UserException;
45 public final class AnalysisMulti {
47 private final static String UNKNOWN = "?";
48 public final static double DEFAULT_CUTOFF_FOR_SPECIFICS = 0.5;
49 public final static String DEFAULT_SEPARATOR = ".";
50 public final static Pattern DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE = Pattern.compile( "_#\\d+_M=(.+)" );
52 public static ResultMulti execute( final Phylogeny p ) throws UserException {
53 return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR, DEFAULT_CUTOFF_FOR_SPECIFICS );
56 public static ResultMulti execute( final Phylogeny p, final String separator ) throws UserException {
57 return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator, DEFAULT_CUTOFF_FOR_SPECIFICS );
60 public static ResultMulti execute( final Phylogeny p, final String separator, final double cutoff_for_specifics )
61 throws UserException {
62 return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator, cutoff_for_specifics );
65 public static ResultMulti execute( final Phylogeny p, final double cutoff_for_specifics ) throws UserException {
66 return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR, cutoff_for_specifics );
69 public static ResultMulti execute( final Phylogeny p,
71 final String separator,
72 final double cutoff_for_specifics )
73 throws UserException {
74 if ( ForesterUtil.isEmpty( separator ) ) {
75 throw new IllegalArgumentException( "separator must not be null or empty" );
77 cleanUpExternalNames( p, separator );
78 final List<PhylogenyNode> qnodes = p.getNodes( query );
79 final ResultMulti res = new ResultMulti();
80 res.setQueryNamePrefix( obtainQueryPrefix( query, qnodes ) );
81 res.setTotalNumberOfMatches( qnodes.size() );
82 res.setReferenceTreeNumberOfExternalNodes( p.getNumberOfExternalNodes() - qnodes.size() );
83 for( int i = 0; i < qnodes.size(); ++i ) {
84 final PhylogenyNode qnode = qnodes.get( i );
85 if ( qnode.isRoot() ) {
86 throw new UserException( "query " + query + " is root" );
88 if ( qnode.getParent().isRoot() ) {
89 throw new UserException( "parent of query " + query + " is root" );
91 PhylogenyNode qnode_p = qnode.getParent();
92 PhylogenyNode qnode_pp = qnode.getParent().getParent();
93 //This is to deal with internal nodes with 1 descendant.
94 while ( qnode_p.getNumberOfDescendants() == 1 ) {
95 qnode_p = qnode_p.getParent();
97 while ( qnode_pp.getNumberOfDescendants() == 1 ) {
98 qnode_pp = qnode_pp.getParent();
100 final List<String> qnode_ext_nodes_names = new ArrayList<>();
101 for( final PhylogenyNode qnode_ext_node : qnode_pp.getAllExternalDescendants() ) {
102 final String name = qnode_ext_node.getName();
103 final Matcher m = query.matcher( name );
105 qnode_ext_nodes_names.add( name );
108 final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names, separator );
109 final Matcher matcher = query.matcher( qnode.getName() );
110 String conf_str = null;
111 if ( matcher.find() ) {
112 conf_str = matcher.group( 1 );
115 throw new IllegalStateException( "query pattern does not match [this should have never happened!]" );
117 final double conf = Double.parseDouble( conf_str );
118 if ( !ForesterUtil.isEmpty( greatest_common_prefix ) ) {
119 res.addGreatestCommonPrefix( greatest_common_prefix, conf );
122 res.addGreatestCommonPrefix( UNKNOWN, conf );
124 final String greatest_common_prefix_up = analyzeSiblings( qnode_p, qnode_pp, separator, query );
125 if ( !ForesterUtil.isEmpty( greatest_common_prefix_up ) ) {
126 res.addGreatestCommonPrefixUp( greatest_common_prefix_up, conf );
129 res.addGreatestCommonPrefixUp( UNKNOWN, conf );
131 final String greatest_common_prefix_down = analyzeSiblings( qnode, qnode_p, separator, query );
132 if ( !ForesterUtil.isEmpty( greatest_common_prefix_down ) ) {
133 res.addGreatestCommonPrefixDown( greatest_common_prefix_down, conf );
136 res.addGreatestCommonPrefixDown( UNKNOWN, conf );
139 res.analyze( cutoff_for_specifics );
143 private final static String obtainQueryPrefix( final Pattern query, final List<PhylogenyNode> qnodes )
144 throws UserException {
145 String query_name_prefix = null;
146 for( final PhylogenyNode n : qnodes ) {
147 final String name = n.getName();
148 final Matcher matcher = query.matcher( name );
149 if ( matcher.find() ) {
150 final String prefix = name.substring( 0, matcher.start() );
151 if ( ForesterUtil.isEmpty( prefix ) ) {
152 throw new UserException( "query nodes with empty label prefix found: \"" + prefix + "\"" );
154 if ( query_name_prefix == null ) {
155 query_name_prefix = prefix;
157 else if ( !query_name_prefix.equals( prefix ) ) {
158 throw new UserException( "query nodes with different label prefixes found: \"" + query_name_prefix
159 + "\" and \"" + prefix + "\"" );
163 return query_name_prefix;
166 private final static void cleanUpExternalNames( final Phylogeny p, final String separator ) throws UserException {
167 final Pattern pattern1 = Pattern.compile( "\\Q" + separator + "\\E" + "\\s+" );
168 final Pattern pattern2 = Pattern.compile( "\\s+" + "\\Q" + separator + "\\E" );
169 final Pattern pattern3 = Pattern.compile( "\\Q" + separator + separator + "\\E" );
170 final PhylogenyNodeIterator it = p.iteratorExternalForward();
171 while ( it.hasNext() ) {
172 final PhylogenyNode node = it.next();
173 final String name = node.getName().trim();
174 if ( ForesterUtil.isEmpty( name ) ) {
175 throw new UserException( "external node(s) with empty annotation found" );
177 if ( name.endsWith( separator ) ) {
178 throw new UserException( "illegally formatted annotation found: annotations cannot end with separator: "
181 if ( name.startsWith( separator ) ) {
182 throw new UserException( "illegally formatted annotation found: annotations cannot start with separator: "
185 if ( pattern1.matcher( name ).find() ) {
186 throw new UserException( "illegally formatted annotation found: separator followed by whitespace: "
189 if ( pattern2.matcher( name ).find() ) {
190 throw new UserException( "illegally formatted annotation found: whitespace followed by separator: "
193 if ( pattern3.matcher( name ).find() ) {
194 throw new UserException( "illegally formatted annotation found: empty annotation level: " + name );
196 node.setName( name.replaceAll( "\\s+", " " ) );
200 private final static String analyzeSiblings( final PhylogenyNode child,
201 final PhylogenyNode parent,
202 final String separator,
203 final Pattern query ) {
204 final int child_index = child.getChildNodeIndex();
205 final List<String> ext_nodes_names = new ArrayList<>();
206 final List<PhylogenyNode> descs = parent.getDescendants();
207 for( int i = 0; i < descs.size(); ++i ) {
208 if ( i != child_index ) {
209 final PhylogenyNode d = descs.get( i );
210 for( final PhylogenyNode n : d.getAllExternalDescendants() ) {
211 final String name = n.getName();
212 final Matcher m = query.matcher( name );
214 ext_nodes_names.add( name );
219 final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names, separator );
220 return greatest_common_prefix;
223 public final static void performMapping( final Pattern pattern,
224 final SortedMap<String, String> map,
226 final boolean verbose )
227 throws UserException {
229 System.out.println();
230 System.out.println( "Id to annotation mapping:" );
232 final PhylogenyNodeIterator it = p.iteratorExternalForward();
233 while ( it.hasNext() ) {
234 final PhylogenyNode node = it.next();
235 final String name = node.getName().trim();
236 if ( ForesterUtil.isEmpty( name ) ) {
237 throw new UserException( "external node with empty name found" );
239 final Matcher m = pattern.matcher( name );
241 if ( !map.containsKey( name ) ) {
242 throw new UserException( "no mapping for \"" + name + "\" found" );
244 node.setName( map.get( name ).trim() );
246 System.out.println( name + " -> " + node.getName() );
251 System.out.println();
255 public final static void performExtraProcessing1( final Pattern query_pattern,
257 final String extra_sep,
259 final String annotation_sep,
260 final boolean verbose )
261 throws UserException {
263 System.out.println();
264 System.out.println( "Extra annotation processing:" );
266 final PhylogenyNodeIterator it = p.iteratorExternalForward();
267 while ( it.hasNext() ) {
268 final PhylogenyNode node = it.next();
269 final String name = node.getName().trim();
270 if ( ForesterUtil.isEmpty( name ) ) {
271 throw new UserException( "external node with empty name found" );
273 if ( !query_pattern.matcher( name ).find() ) {
274 final StringBuilder sb = new StringBuilder();
275 final int last_index = name.lastIndexOf( extra_sep );
276 if ( last_index >= 0 ) {
277 final String annotation = name.substring( last_index + 1 ).trim();
278 if ( ForesterUtil.isEmptyTrimmed( annotation ) ) {
279 throw new UserException( "llegally formatted annotation: " + name );
282 final String extra = name.substring( 0, last_index ).trim();
283 sb.append( annotation );
284 if ( !ForesterUtil.isEmpty( extra ) ) {
285 sb.append( annotation_sep );
290 sb.append( annotation );
292 node.setName( sb.toString() );
294 System.out.println( name + " -> " + node.getName() );
300 System.out.println();
304 public final static void performSpecialProcessing1( final Pattern query_pattern,
306 final String annotation_sep,
307 final Pattern special_pattern,
308 final boolean verbose )
309 throws UserException {
311 System.out.println();
312 System.out.println( "Special annotation processing:" );
314 final PhylogenyNodeIterator it = p.iteratorExternalForward();
315 while ( it.hasNext() ) {
316 final PhylogenyNode node = it.next();
317 final String name = node.getName().trim();
318 if ( ForesterUtil.isEmpty( name ) ) {
319 throw new UserException( "external node with empty name found" );
321 if ( !query_pattern.matcher( name ).find() ) {
322 final Matcher special_m = special_pattern.matcher( name );
323 if ( special_m.matches() ) {
324 final int c = special_m.groupCount();
326 throw new UserException( "illegal special pattern: " + special_pattern
327 + " (need at least one capturing group)" );
329 final StringBuilder sb = new StringBuilder();
330 for( int i = 1; i <= c; ++i ) {
331 final String g = special_m.group( i );
332 if ( !ForesterUtil.isEmpty( g ) ) {
334 sb.append( annotation_sep );
336 sb.append( special_m.group( i ) );
339 node.setName( sb.toString() );
341 System.out.println( name + " -> " + node.getName() );
345 throw new UserException( "illegally formatted annotation for special processing: " + name
346 + " (expected pattern: " + special_pattern + ")" );
351 System.out.println();