2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2017 Christian M. Zmasek
6 // Copyright (C) 2017 J. Craig Venter Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phyloxml @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.clade_analysis;
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.HashSet;
32 import java.util.List;
33 import java.util.Map.Entry;
35 import java.util.SortedMap;
36 import java.util.TreeMap;
38 import org.forester.util.ForesterUtil;
39 import org.forester.util.UserException;
41 public final class ResultMulti {
43 private final static double MIN_DIFF = 1E-5;
44 private final String _separator;
45 private final List<Prefix> _greatest_common_prefixes = new ArrayList<>();
46 private final List<Prefix> _greatest_common_prefixes_up = new ArrayList<>();
47 private final List<Prefix> _greatest_common_prefixes_down = new ArrayList<>();
48 private List<Prefix> _all = null;
49 private List<Prefix> _collapsed = null;
50 private List<Prefix> _cleaned_spec = null;
51 private boolean _has_specifics = false;
52 private List<Prefix> _all_up = null;
53 private List<Prefix> _collapsed_up = null;
54 private List<Prefix> _cleaned_spec_up = null;
55 private boolean _has_specifics_up = false;
56 private List<Prefix> _all_down = null;
57 private List<Prefix> _collapsed_down = null;
58 private List<Prefix> _cleaned_spec_down = null;
59 private boolean _has_specifics_down = false;
60 private int _matches = 0;
61 private int _ref_tree_ext_nodes = 0;
62 private String _query_name_prefix = "";
64 ResultMulti( final String separator ) {
65 if ( ForesterUtil.isEmpty( separator ) ) {
66 throw new IllegalArgumentException( "separator must not be null or empty" );
68 _separator = separator;
73 _separator = AnalysisMulti.DEFAULT_SEPARATOR;
77 public List<Prefix> getAllMultiHitPrefixesUp() {
81 public List<Prefix> getCollapsedMultiHitPrefixesUp() {
85 public List<Prefix> getSpecificMultiHitPrefixesUp() {
86 return _cleaned_spec_up;
89 public boolean isHasSpecificMultiHitsPrefixesUp() {
90 return _has_specifics_up;
93 public List<Prefix> getAllMultiHitPrefixesDown() {
97 public List<Prefix> getCollapsedMultiHitPrefixesDown() {
98 return _collapsed_down;
101 public List<Prefix> getSpecificMultiHitPrefixesDown() {
102 return _cleaned_spec_down;
105 public boolean isHasSpecificMultiHitsPrefixesDown() {
106 return _has_specifics_down;
109 public List<Prefix> getAllMultiHitPrefixes() {
113 public List<Prefix> getCollapsedMultiHitPrefixes() {
117 public List<Prefix> getSpecificMultiHitPrefixes() {
118 return _cleaned_spec;
121 public boolean isHasSpecificMultiHitsPrefixes() {
122 return _has_specifics;
125 public String getQueryNamePrefix() {
126 return _query_name_prefix;
129 public int getNumberOfMatches() {
133 public int getReferenceTreeNumberOfExternalNodes() {
134 return _ref_tree_ext_nodes;
138 public final String toString() {
139 final StringBuilder sb = new StringBuilder();
140 sb.append( "Query: " );
141 sb.append( getQueryNamePrefix() );
142 sb.append( ForesterUtil.LINE_SEPARATOR );
143 sb.append( "Matching Clade(s):" );
144 sb.append( ForesterUtil.LINE_SEPARATOR );
145 for( final Prefix prefix : _collapsed ) {
147 sb.append( ForesterUtil.LINE_SEPARATOR );
149 if ( _has_specifics ) {
150 sb.append( ForesterUtil.LINE_SEPARATOR );
151 sb.append( "Specific-hit(s):" );
152 sb.append( ForesterUtil.LINE_SEPARATOR );
153 for( final Prefix prefix : _cleaned_spec ) {
155 sb.append( ForesterUtil.LINE_SEPARATOR );
157 sb.append( ForesterUtil.LINE_SEPARATOR );
158 sb.append( "Matching Clade(s) with Specific-hit(s):" );
159 sb.append( ForesterUtil.LINE_SEPARATOR );
160 for( final Prefix prefix : _collapsed ) {
162 sb.append( ForesterUtil.LINE_SEPARATOR );
163 for( final Prefix spec : _cleaned_spec ) {
164 if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
165 sb.append( " " + spec );
166 sb.append( ForesterUtil.LINE_SEPARATOR );
171 if ( !ForesterUtil.isEmpty( _all_down ) ) {
172 sb.append( ForesterUtil.LINE_SEPARATOR );
173 sb.append( "Matching Down-tree Bracketing Clade(s):" );
174 sb.append( ForesterUtil.LINE_SEPARATOR );
175 for( final Prefix prefix : _collapsed_down ) {
177 sb.append( ForesterUtil.LINE_SEPARATOR );
180 if ( !ForesterUtil.isEmpty( _all_up ) ) {
181 sb.append( ForesterUtil.LINE_SEPARATOR );
182 sb.append( "Matching Up-tree Bracketing Clade(s):" );
183 sb.append( ForesterUtil.LINE_SEPARATOR );
184 for( final Prefix prefix : _collapsed_up ) {
186 sb.append( ForesterUtil.LINE_SEPARATOR );
189 sb.append( ForesterUtil.LINE_SEPARATOR );
190 sb.append( "Total Number of Matches: " + getNumberOfMatches() + "/" + getReferenceTreeNumberOfExternalNodes() );
191 sb.append( ForesterUtil.LINE_SEPARATOR );
192 return sb.toString();
195 void addGreatestCommonPrefix( final String prefix, final double confidence ) {
196 _greatest_common_prefixes.add( new Prefix( prefix, confidence, _separator ) );
199 void addGreatestCommonPrefixUp( final String prefix_up, final double confidence ) {
200 _greatest_common_prefixes_up.add( new Prefix( prefix_up, confidence, _separator ) );
203 void addGreatestCommonPrefixDown( final String prefix_down, final double confidence ) {
204 _greatest_common_prefixes_down.add( new Prefix( prefix_down, confidence, _separator ) );
207 void setQueryNamePrefix( final String query_name_prefix ) {
208 if ( !ForesterUtil.isEmpty( _query_name_prefix ) ) {
209 throw new IllegalStateException( "illegal attempt to change the query name prefix" );
211 _query_name_prefix = query_name_prefix;
214 void setTotalNumberOfMatches( final int matches ) {
215 if ( _matches > 0 ) {
216 throw new IllegalStateException( "illegal attempt to change the number of matches" );
221 public void setReferenceTreeNumberOfExternalNodes( final int ext_nodes ) {
222 if ( _ref_tree_ext_nodes > 0 ) {
223 throw new IllegalStateException( "illegal attempt to change the number of external nodes" );
225 _ref_tree_ext_nodes = ext_nodes;
228 final void analyze( final double cutoff_for_specifics ) throws UserException {
230 analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, cutoff_for_specifics );
231 analyzeGreatestCommonPrefixesUp( _greatest_common_prefixes_up, _separator, cutoff_for_specifics );
232 analyzeGreatestCommonPrefixesDown( _greatest_common_prefixes_down, _separator, cutoff_for_specifics );
235 private final void reset() {
236 _all = new ArrayList<>();
237 _collapsed = new ArrayList<>();
238 _cleaned_spec = new ArrayList<>();
239 _has_specifics = false;
240 _all_up = new ArrayList<>();
241 _collapsed_up = new ArrayList<>();
242 _cleaned_spec_up = new ArrayList<>();
243 _has_specifics_up = false;
244 _all_down = new ArrayList<>();
245 _collapsed_down = new ArrayList<>();
246 _cleaned_spec_down = new ArrayList<>();
247 _has_specifics_down = false;
250 private final void analyzeGreatestCommonPrefixes( final List<Prefix> greatest_common_prefixes,
251 final String separator,
252 final double cutoff )
253 throws UserException {
254 final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes, separator );
255 if ( !ForesterUtil.isEmpty( l ) ) {
256 sortPrefixesAccordingToConfidence( l );
257 _all = removeLessSpecificPrefixes( l, separator );
258 _collapsed = collapse( _all );
259 _has_specifics = false;
261 _cleaned_spec = obtainSpecifics( cutoff, _all, _collapsed, separator );
262 if ( !ForesterUtil.isEmpty( _cleaned_spec ) ) {
263 _has_specifics = true;
269 private final void analyzeGreatestCommonPrefixesUp( final List<Prefix> greatest_common_prefixes_up,
270 final String separator,
271 final double cutoff )
272 throws UserException {
273 final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes_up, separator );
274 if ( !ForesterUtil.isEmpty( l ) ) {
275 sortPrefixesAccordingToConfidence( l );
276 _all_up = removeLessSpecificPrefixes( l, separator );
277 _collapsed_up = collapse( _all_up );
278 _has_specifics_up = false;
280 _cleaned_spec_up = obtainSpecifics( cutoff, _all_up, _collapsed_up, separator );
281 if ( !ForesterUtil.isEmpty( _cleaned_spec_up ) ) {
282 _has_specifics_up = true;
288 final void analyzeGreatestCommonPrefixesDown( final List<Prefix> greatest_common_prefixes_down,
289 final String separator,
290 final double cutoff )
291 throws UserException {
292 final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes_down, separator );
293 if ( !ForesterUtil.isEmpty( l ) ) {
294 sortPrefixesAccordingToConfidence( l );
295 _all_down = removeLessSpecificPrefixes( l, separator );
296 _collapsed_down = collapse( _all_down );
297 _has_specifics_down = false;
299 _cleaned_spec_down = obtainSpecifics( cutoff, _all_down, _collapsed_down, separator );
300 if ( !ForesterUtil.isEmpty( _cleaned_spec_down ) ) {
301 _has_specifics_down = true;
307 final static List<Prefix> obtainSpecifics( final double cutoff,
308 final List<Prefix> cleaned,
309 final List<Prefix> collapsed,
310 final String separator ) {
311 final List<Prefix> cleaned_spec = new ArrayList<>();
312 final Set<String> collapsed_set = new HashSet<>();
313 for( final Prefix prefix : collapsed ) {
314 collapsed_set.add( prefix.getPrefix() );
316 final List<Prefix> spec = new ArrayList<>();
317 for( final Prefix prefix : cleaned ) {
318 if ( ( prefix.getConfidence() >= cutoff ) && !collapsed_set.contains( prefix.getPrefix() ) ) {
322 for( final Prefix o : spec ) {
324 for( final Prefix i : spec ) {
325 if ( ( !o.getPrefix().equals( i.getPrefix() ) )
326 && ( ForesterUtil.isContainsPrefix( i.getPrefix(), o.getPrefix(), separator ) ) ) {
330 /* if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) ) ) {
336 cleaned_spec.add( o );
342 private final static List<Prefix> collapse( final List<Prefix> cleaned ) throws UserException {
343 final List<Prefix> collapsed = new ArrayList<>();
344 final Set<String> firsts = new HashSet<>();
345 double confidence_sum = 0;
346 for( final Prefix prefix : cleaned ) {
347 final String f = prefix.getPrefixFirstElement();
348 if ( !firsts.contains( f ) ) {
350 collapsed.add( prefix );
351 confidence_sum += prefix.getConfidence();
354 if ( !ForesterUtil.isEqual( confidence_sum, 1.0, MIN_DIFF ) ) {
355 throw new UserException( "confidences add up to " + confidence_sum + " instead of 1.0" );
361 * This replaces (by way of example)
367 * I.e. it removes less specific prefixes.
370 private final static List<Prefix> removeLessSpecificPrefixes( final List<Prefix> l, final String separator ) {
371 final List<Prefix> cleaned = new ArrayList<>();
372 for( final Prefix o : l ) {
374 for( final Prefix i : l ) {
375 /*if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) )
376 && ForesterUtil.isEqual( i.getConfidence(),
377 o.getConfidence() ) ) {
381 if ( ( !o.getPrefix().equals( i.getPrefix() ) )
382 && ( ForesterUtil.isContainsPrefix( i.getPrefix(), o.getPrefix(), separator ) )
383 && ForesterUtil.isEqual( i.getConfidence(),
384 o.getConfidence() ) ) {
396 private final static void sortPrefixesAccordingToConfidence( final List<Prefix> l ) {
397 Collections.sort( l, new Comparator<Prefix>() {
400 public int compare( final Prefix x, final Prefix y ) {
401 return compare( x.getConfidence(), y.getConfidence() );
404 private int compare( final double a, final double b ) {
405 return a > b ? -1 : a > b ? 1 : 0;
410 private final static List<Prefix> obtainAllPrefixes( final List<Prefix> greatest_common_prefixes,
411 final String separator ) {
412 final SortedMap<String, Double> map = new TreeMap<>();
413 for( final Prefix prefix : greatest_common_prefixes ) {
414 final List<String> prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator );
415 for( final String p : prefixes ) {
419 for( final String key : map.keySet() ) {
420 for( final Prefix prefix : greatest_common_prefixes ) {
421 if ( ForesterUtil.isContainsPrefix( prefix.getPrefix(), key, separator ) ) {
422 map.put( key, map.get( key ) + prefix.getConfidence() );
426 final List<Prefix> l = new ArrayList<>();
427 for( final Entry<String, Double> entry : map.entrySet() ) {
428 l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) );