2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2017 Christian M. Zmasek
6 // Copyright (C) 2017 J. Craig Venter Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phyloxml @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.clade_analysis;
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.HashSet;
32 import java.util.List;
33 import java.util.Map.Entry;
35 import java.util.SortedMap;
36 import java.util.TreeMap;
38 import org.forester.util.ForesterUtil;
40 public final class Result2 {
42 private final String _separator;
43 private final List<Prefix> _greatest_common_prefixes = new ArrayList<Prefix>();
44 private List<Prefix> _greatest_common_prefixes_up = new ArrayList<Prefix>();
45 private List<Prefix> _greatest_common_prefixes_down = new ArrayList<Prefix>();
46 private final List<String> _warnings = new ArrayList<>();
47 private String _greatest_common_clade_subtree_confidence = "";
48 private String _greatest_common_clade_subtree_confidence_up = "";
49 private String _greatest_common_clade_subtree_confidence_down = "";
50 private List<Prefix> _all = null;
51 private List<Prefix> _collapsed = null;
52 private List<Prefix> _cleaned_spec = null;
53 private boolean _has_specifics = false;
54 private List<Prefix> _all_up = null;
55 private List<Prefix> _collapsed_up = null;
56 private List<Prefix> _cleaned_spec_up = null;
57 private boolean _has_specifics_up = false;
58 private List<Prefix> _all_down = null;
59 private List<Prefix> _collapsed_down = null;
60 private List<Prefix> _cleaned_spec_down = null;
61 private boolean _has_specifics_down = false;
63 public Result2( final String separator ) {
64 _separator = separator;
68 _separator = ".";//TODO make const somewhere
71 public List<Prefix> getAllMultiHitPrefixesUp() {
75 public List<Prefix> getCollapsedMultiHitPrefixesUp() {
79 public List<Prefix> getSpecificMultiHitPrefixesUp() {
80 return _cleaned_spec_up;
83 public boolean isHasSpecificMultiHitsPrefixesUp() {
84 return _has_specifics_up;
87 public List<Prefix> getAllMultiHitPrefixesDown() {
91 public List<Prefix> getCollapsedMultiHitPrefixesDown() {
92 return _collapsed_down;
95 public List<Prefix> getSpecificMultiHitPrefixesDown() {
96 return _cleaned_spec_down;
99 public boolean isHasSpecificMultiHitsPrefixesDown() {
100 return _has_specifics_down;
103 public List<Prefix> getAllMultiHitPrefixes() {
107 public List<Prefix> getCollapsedMultiHitPrefixes() {
111 public List<Prefix> getSpecificMultiHitPrefixes() {
112 return _cleaned_spec;
115 public boolean isHasSpecificMultiHitsPrefixes() {
116 return _has_specifics;
119 void addWarning( final String warning ) {
120 _warnings.add( warning );
123 void addGreatestCommonPrefix( final String prefix, final double confidence ) {
124 _greatest_common_prefixes.add( new Prefix( prefix, confidence, _separator ) );
127 void addGreatestCommonPrefixUp( final String prefix_up, final double confidence ) {
128 _greatest_common_prefixes_up.add( new Prefix( prefix_up, confidence, _separator ) );
131 void addGreatestCommonPrefixDown( final String prefix_down, final double confidence ) {
132 _greatest_common_prefixes_down.add( new Prefix( prefix_down, confidence, _separator ) );
135 void setGreatestCommonCladeSubtreeConfidence( final String greatest_common_clade_confidence ) {
136 _greatest_common_clade_subtree_confidence = greatest_common_clade_confidence;
139 void setGreatestCommonCladeUpSubtreeConfidence( final String greatest_common_clade_confidence_up ) {
140 _greatest_common_clade_subtree_confidence_up = greatest_common_clade_confidence_up;
143 void setGreatestCommonCladeDownSubtreeConfidence( final String greatest_common_clade_confidence_down ) {
144 _greatest_common_clade_subtree_confidence_down = greatest_common_clade_confidence_down;
147 public String getGreatestCommonCladeSubtreeConfidence() {
148 return _greatest_common_clade_subtree_confidence;
151 public String getGreatestCommonCladeUpSubtreeConfidence() {
152 return _greatest_common_clade_subtree_confidence_up;
155 public String getGreatestCommonCladeDownSubtreeConfidence() {
156 return _greatest_common_clade_subtree_confidence_down;
159 public List<String> getWarnings() {
163 public void analyzeGreatestCommonPrefixes( final double cutoff_for_specifics ) {
164 analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, cutoff_for_specifics );
165 analyzeGreatestCommonPrefixesUp( _greatest_common_prefixes_up, _separator, cutoff_for_specifics );
166 analyzeGreatestCommonPrefixesDown( _greatest_common_prefixes_down, _separator, cutoff_for_specifics );
169 public void analyzeGreatestCommonPrefixes() {
170 analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, -1 );
173 private void analyzeGreatestCommonPrefixesUp( final double cutoff_for_specifics ) {
174 analyzeGreatestCommonPrefixesUp( _greatest_common_prefixes_up, _separator, cutoff_for_specifics );
177 private void analyzeGreatestCommonPrefixesUp() {
178 analyzeGreatestCommonPrefixesUp( _greatest_common_prefixes_up, _separator, -1 );
181 private void analyzeGreatestCommonPrefixesDown( final double cutoff_for_specifics ) {
182 analyzeGreatestCommonPrefixesDown( _greatest_common_prefixes_down, _separator, cutoff_for_specifics );
185 private void analyzeGreatestCommonPrefixesDown() {
186 analyzeGreatestCommonPrefixesDown( _greatest_common_prefixes_down, _separator, -1 );
189 private final void analyzeGreatestCommonPrefixes( final List<Prefix> greatest_common_prefixes,
190 final String separator,
191 final double cutoff ) {
192 final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes, separator );
193 sortPrefixesAccordingToConfidence( l );
194 _all = removeLessSpecificPrefixes( l );
195 _collapsed = collapse( _all );
196 _has_specifics = false;
198 _cleaned_spec = obtainSpecifics( cutoff, _all, _collapsed );
199 if ( _cleaned_spec != null && _cleaned_spec.size() > 0 ) {
200 _has_specifics = true;
204 _cleaned_spec = null;
208 private final void analyzeGreatestCommonPrefixesUp( final List<Prefix> greatest_common_prefixes_up,
209 final String separator,
210 final double cutoff ) {
211 final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes_up, separator );
212 sortPrefixesAccordingToConfidence( l );
213 _all_up = removeLessSpecificPrefixes( l );
214 _collapsed_up = collapse( _all_up );
215 _has_specifics_up = false;
217 _cleaned_spec_up = obtainSpecifics( cutoff, _all_up, _collapsed_up );
218 if ( _cleaned_spec_up != null && _cleaned_spec_up.size() > 0 ) {
219 _has_specifics_up = true;
223 _cleaned_spec_up = null;
227 private final void analyzeGreatestCommonPrefixesDown( final List<Prefix> greatest_common_prefixes_down,
228 final String separator,
229 final double cutoff ) {
230 final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes_down, separator );
231 sortPrefixesAccordingToConfidence( l );
232 _all_down = removeLessSpecificPrefixes( l );
233 _collapsed_down = collapse( _all_down );
234 _has_specifics_down = false;
236 _cleaned_spec_down = obtainSpecifics( cutoff, _all_down, _collapsed_down );
237 if ( _cleaned_spec_down != null && _cleaned_spec_down.size() > 0 ) {
238 _has_specifics_down = true;
242 _cleaned_spec_down = null;
246 private final static List<Prefix> obtainSpecifics( final double cutoff,
247 final List<Prefix> cleaned,
248 final List<Prefix> collapsed ) {
249 final List<Prefix> cleaned_spec = new ArrayList<>();
250 final Set<String> collapsed_set = new HashSet<>();
251 for( final Prefix prefix : collapsed ) {
252 collapsed_set.add( prefix.getPrefix() );
254 final List<Prefix> spec = new ArrayList<>();
255 for( final Prefix prefix : cleaned ) {
256 if ( ( prefix.getConfidence() >= cutoff ) && !collapsed_set.contains( prefix.getPrefix() ) ) {
260 for( final Prefix o : spec ) {
262 for( final Prefix i : spec ) {
263 if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) ) ) {
269 cleaned_spec.add( o );
275 private final static List<Prefix> collapse( final List<Prefix> cleaned ) {
276 final List<Prefix> collapsed = new ArrayList<>();
277 final Set<String> firsts = new HashSet<>();
278 double confidence_sum = 0;
279 for( final Prefix prefix : cleaned ) {
280 final String f = prefix.getPrefixFirstElement();
281 if ( !firsts.contains( f ) ) {
283 collapsed.add( prefix );
284 confidence_sum += prefix.getConfidence();
287 if ( !ForesterUtil.isEqual( confidence_sum, 1.0, 1E-5 ) ) {
288 // throw new IllegalArgumentException( "Confidences add up to " + confidence_sum + " instead of 1.0" );
294 * This replaces (by way of example)
300 * I.e. it removes less specific prefixes.
303 private final static List<Prefix> removeLessSpecificPrefixes( final List<Prefix> l ) {
304 final List<Prefix> cleaned = new ArrayList<>();
305 for( final Prefix o : l ) {
307 for( final Prefix i : l ) {
308 if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) )
309 && ForesterUtil.isEqual( i.getConfidence(),
310 o.getConfidence() ) ) {
322 private final static void sortPrefixesAccordingToConfidence( final List<Prefix> l ) {
323 Collections.sort( l, new Comparator<Prefix>() {
326 public int compare( final Prefix x, final Prefix y ) {
327 return compare( x.getConfidence(), y.getConfidence() );
330 private int compare( final double a, final double b ) {
331 return a > b ? -1 : a > b ? 1 : 0;
336 private final static List<Prefix> obtainAllPrefixes( final List<Prefix> greatest_common_prefixes,
337 final String separator ) {
338 final SortedMap<String, Double> map = new TreeMap<>();
339 for( final Prefix prefix : greatest_common_prefixes ) {
340 final List<String> prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator );
341 for( final String p : prefixes ) {
345 for( final String key : map.keySet() ) {
346 for( final Prefix prefix : greatest_common_prefixes ) {
347 if ( prefix.getPrefix().startsWith( key ) ) {
348 map.put( key, map.get( key ) + prefix.getConfidence() );
352 final List<Prefix> l = new ArrayList<>();
353 for( final Entry<String, Double> entry : map.entrySet() ) {
354 l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) );
359 public final String toString() {
360 final StringBuilder sb = new StringBuilder();
361 //TODO add all other stuff
362 sb.append( "Cleaned:" );
363 sb.append( ForesterUtil.LINE_SEPARATOR );
364 for( final Prefix prefix : _all ) {
366 sb.append( ForesterUtil.LINE_SEPARATOR );
368 sb.append( ForesterUtil.LINE_SEPARATOR );
369 sb.append( "Collapsed:" );
370 sb.append( ForesterUtil.LINE_SEPARATOR );
371 for( final Prefix prefix : _collapsed ) {
373 sb.append( ForesterUtil.LINE_SEPARATOR );
375 if ( _has_specifics ) {
376 sb.append( ForesterUtil.LINE_SEPARATOR );
377 sb.append( "Specifics:" );
378 sb.append( ForesterUtil.LINE_SEPARATOR );
379 for( final Prefix prefix : _cleaned_spec ) {
381 sb.append( ForesterUtil.LINE_SEPARATOR );
383 sb.append( ForesterUtil.LINE_SEPARATOR );
384 sb.append( "Collapsed With Specifics:" );
385 sb.append( ForesterUtil.LINE_SEPARATOR );
386 for( final Prefix prefix : _collapsed ) {
388 sb.append( ForesterUtil.LINE_SEPARATOR );
389 for( final Prefix spec : _cleaned_spec ) {
390 if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
391 sb.append( " " + spec );
392 sb.append( ForesterUtil.LINE_SEPARATOR );
398 if ( _all_down != null ) {
399 sb.append( ForesterUtil.LINE_SEPARATOR );
400 sb.append( "Cleaned Down:" );
401 sb.append( ForesterUtil.LINE_SEPARATOR );
402 for( final Prefix prefix : _all_down ) {
404 sb.append( ForesterUtil.LINE_SEPARATOR );
406 sb.append( ForesterUtil.LINE_SEPARATOR );
407 sb.append( "Collapsed Down:" );
408 sb.append( ForesterUtil.LINE_SEPARATOR );
409 for( final Prefix prefix : _collapsed_down ) {
411 sb.append( ForesterUtil.LINE_SEPARATOR );
413 if ( _has_specifics_down ) {
414 sb.append( ForesterUtil.LINE_SEPARATOR );
415 sb.append( "Specifics Down:" );
416 sb.append( ForesterUtil.LINE_SEPARATOR );
417 for( final Prefix prefix : _cleaned_spec_down ) {
419 sb.append( ForesterUtil.LINE_SEPARATOR );
421 sb.append( ForesterUtil.LINE_SEPARATOR );
422 sb.append( "Collapsed With Specifics Down:" );
423 sb.append( ForesterUtil.LINE_SEPARATOR );
424 for( final Prefix prefix : _collapsed_down ) {
426 sb.append( ForesterUtil.LINE_SEPARATOR );
427 for( final Prefix spec : _cleaned_spec_down ) {
428 if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
429 sb.append( " " + spec );
430 sb.append( ForesterUtil.LINE_SEPARATOR );
437 if ( _all_up != null ) {
438 sb.append( ForesterUtil.LINE_SEPARATOR );
439 sb.append( "Cleaned Up:" );
440 sb.append( ForesterUtil.LINE_SEPARATOR );
441 for( final Prefix prefix : _all_up ) {
443 sb.append( ForesterUtil.LINE_SEPARATOR );
445 sb.append( ForesterUtil.LINE_SEPARATOR );
446 sb.append( "Collapsed Up:" );
447 sb.append( ForesterUtil.LINE_SEPARATOR );
448 for( final Prefix prefix : _collapsed_up ) {
450 sb.append( ForesterUtil.LINE_SEPARATOR );
452 if ( _has_specifics ) {
453 sb.append( ForesterUtil.LINE_SEPARATOR );
454 sb.append( "Specifics Up:" );
455 sb.append( ForesterUtil.LINE_SEPARATOR );
456 for( final Prefix prefix : _cleaned_spec_up ) {
458 sb.append( ForesterUtil.LINE_SEPARATOR );
460 sb.append( ForesterUtil.LINE_SEPARATOR );
461 sb.append( "Collapsed With Specifics Up:" );
462 sb.append( ForesterUtil.LINE_SEPARATOR );
463 for( final Prefix prefix : _collapsed_up ) {
465 sb.append( ForesterUtil.LINE_SEPARATOR );
466 for( final Prefix spec : _cleaned_spec_up ) {
467 if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
468 sb.append( " " + spec );
469 sb.append( ForesterUtil.LINE_SEPARATOR );
476 return sb.toString();