2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2017 Christian M. Zmasek
6 // Copyright (C) 2017 J. Craig Venter Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phyloxml @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.clade_analysis;
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.HashSet;
32 import java.util.List;
33 import java.util.Map.Entry;
35 import java.util.SortedMap;
36 import java.util.TreeMap;
38 import org.forester.util.ForesterUtil;
40 public final class ResultMulti {
42 private final String _separator;
43 private final List<Prefix> _greatest_common_prefixes = new ArrayList<Prefix>();
44 private final List<Prefix> _greatest_common_prefixes_up = new ArrayList<Prefix>();
45 private final List<Prefix> _greatest_common_prefixes_down = new ArrayList<Prefix>();
46 private List<Prefix> _all = null;
47 private List<Prefix> _collapsed = null;
48 private List<Prefix> _cleaned_spec = null;
49 private boolean _has_specifics = false;
50 private List<Prefix> _all_up = null;
51 private List<Prefix> _collapsed_up = null;
52 private List<Prefix> _cleaned_spec_up = null;
53 private boolean _has_specifics_up = false;
54 private List<Prefix> _all_down = null;
55 private List<Prefix> _collapsed_down = null;
56 private List<Prefix> _cleaned_spec_down = null;
57 private boolean _has_specifics_down = false;
59 ResultMulti( final String separator ) {
60 _separator = separator;
65 _separator = AnalysisMulti.DEFAULT_SEPARATOR;
69 public List<Prefix> getAllMultiHitPrefixesUp() {
73 public List<Prefix> getCollapsedMultiHitPrefixesUp() {
77 public List<Prefix> getSpecificMultiHitPrefixesUp() {
78 return _cleaned_spec_up;
81 public boolean isHasSpecificMultiHitsPrefixesUp() {
82 return _has_specifics_up;
85 public List<Prefix> getAllMultiHitPrefixesDown() {
89 public List<Prefix> getCollapsedMultiHitPrefixesDown() {
90 return _collapsed_down;
93 public List<Prefix> getSpecificMultiHitPrefixesDown() {
94 return _cleaned_spec_down;
97 public boolean isHasSpecificMultiHitsPrefixesDown() {
98 return _has_specifics_down;
101 public List<Prefix> getAllMultiHitPrefixes() {
105 public List<Prefix> getCollapsedMultiHitPrefixes() {
109 public List<Prefix> getSpecificMultiHitPrefixes() {
110 return _cleaned_spec;
113 public boolean isHasSpecificMultiHitsPrefixes() {
114 return _has_specifics;
118 public final String toString() {
119 final StringBuilder sb = new StringBuilder();
120 // sb.append( "Cleaned:" );
121 // sb.append( ForesterUtil.LINE_SEPARATOR );
122 // for( final Prefix prefix : _all ) {
123 // sb.append( prefix );
124 // sb.append( ForesterUtil.LINE_SEPARATOR );
126 // sb.append( ForesterUtil.LINE_SEPARATOR );
127 sb.append( "Collapsed:" );
128 sb.append( ForesterUtil.LINE_SEPARATOR );
129 for( final Prefix prefix : _collapsed ) {
131 sb.append( ForesterUtil.LINE_SEPARATOR );
133 if ( _has_specifics ) {
134 sb.append( ForesterUtil.LINE_SEPARATOR );
135 sb.append( "Specifics:" );
136 sb.append( ForesterUtil.LINE_SEPARATOR );
137 for( final Prefix prefix : _cleaned_spec ) {
139 sb.append( ForesterUtil.LINE_SEPARATOR );
141 sb.append( ForesterUtil.LINE_SEPARATOR );
142 sb.append( "Collapsed With Specifics:" );
143 sb.append( ForesterUtil.LINE_SEPARATOR );
144 for( final Prefix prefix : _collapsed ) {
146 sb.append( ForesterUtil.LINE_SEPARATOR );
147 for( final Prefix spec : _cleaned_spec ) {
148 if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
149 sb.append( " " + spec );
150 sb.append( ForesterUtil.LINE_SEPARATOR );
155 if ( !ForesterUtil.isEmpty( _all_down ) ) {
156 sb.append( ForesterUtil.LINE_SEPARATOR );
157 // sb.append( "Cleaned Down:" );
158 // sb.append( ForesterUtil.LINE_SEPARATOR );
159 // for( final Prefix prefix : _all_down ) {
160 // sb.append( prefix );
161 // sb.append( ForesterUtil.LINE_SEPARATOR );
163 // sb.append( ForesterUtil.LINE_SEPARATOR );
164 sb.append( "Collapsed Down:" );
165 sb.append( ForesterUtil.LINE_SEPARATOR );
166 for( final Prefix prefix : _collapsed_down ) {
168 sb.append( ForesterUtil.LINE_SEPARATOR );
170 /* if ( _has_specifics_down ) {
171 sb.append( ForesterUtil.LINE_SEPARATOR );
172 sb.append( "Specifics Down:" );
173 sb.append( ForesterUtil.LINE_SEPARATOR );
174 for( final Prefix prefix : _cleaned_spec_down ) {
176 sb.append( ForesterUtil.LINE_SEPARATOR );
178 sb.append( ForesterUtil.LINE_SEPARATOR );
179 sb.append( "Collapsed With Specifics Down:" );
180 sb.append( ForesterUtil.LINE_SEPARATOR );
181 for( final Prefix prefix : _collapsed_down ) {
183 sb.append( ForesterUtil.LINE_SEPARATOR );
184 for( final Prefix spec : _cleaned_spec_down ) {
185 if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
186 sb.append( " " + spec );
187 sb.append( ForesterUtil.LINE_SEPARATOR );
193 if ( !ForesterUtil.isEmpty( _all_up ) ) {
194 sb.append( ForesterUtil.LINE_SEPARATOR );
195 // sb.append( "Cleaned Up:" );
196 // sb.append( ForesterUtil.LINE_SEPARATOR );
197 // for( final Prefix prefix : _all_up ) {
198 // sb.append( prefix );
199 // sb.append( ForesterUtil.LINE_SEPARATOR );
201 // sb.append( ForesterUtil.LINE_SEPARATOR );
202 sb.append( "Collapsed Up:" );
203 sb.append( ForesterUtil.LINE_SEPARATOR );
204 for( final Prefix prefix : _collapsed_up ) {
206 sb.append( ForesterUtil.LINE_SEPARATOR );
208 /* if ( _has_specifics ) {
209 sb.append( ForesterUtil.LINE_SEPARATOR );
210 sb.append( "Specifics Up:" );
211 sb.append( ForesterUtil.LINE_SEPARATOR );
212 for( final Prefix prefix : _cleaned_spec_up ) {
214 sb.append( ForesterUtil.LINE_SEPARATOR );
216 sb.append( ForesterUtil.LINE_SEPARATOR );
217 sb.append( "Collapsed With Specifics Up:" );
218 sb.append( ForesterUtil.LINE_SEPARATOR );
219 for( final Prefix prefix : _collapsed_up ) {
221 sb.append( ForesterUtil.LINE_SEPARATOR );
222 for( final Prefix spec : _cleaned_spec_up ) {
223 if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
224 sb.append( " " + spec );
225 sb.append( ForesterUtil.LINE_SEPARATOR );
231 return sb.toString();
234 void addGreatestCommonPrefix( final String prefix, final double confidence ) {
235 _greatest_common_prefixes.add( new Prefix( prefix, confidence, _separator ) );
238 void addGreatestCommonPrefixUp( final String prefix_up, final double confidence ) {
239 _greatest_common_prefixes_up.add( new Prefix( prefix_up, confidence, _separator ) );
242 void addGreatestCommonPrefixDown( final String prefix_down, final double confidence ) {
243 _greatest_common_prefixes_down.add( new Prefix( prefix_down, confidence, _separator ) );
246 final void analyze( final double cutoff_for_specifics ) {
248 analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, cutoff_for_specifics );
249 analyzeGreatestCommonPrefixesUp( _greatest_common_prefixes_up, _separator, cutoff_for_specifics );
250 analyzeGreatestCommonPrefixesDown( _greatest_common_prefixes_down, _separator, cutoff_for_specifics );
253 private final void reset() {
254 _all = new ArrayList<Prefix>();
255 _collapsed = new ArrayList<Prefix>();
256 _cleaned_spec = new ArrayList<Prefix>();
257 _has_specifics = false;
258 _all_up = new ArrayList<Prefix>();
259 _collapsed_up = new ArrayList<Prefix>();
260 _cleaned_spec_up = new ArrayList<Prefix>();
261 _has_specifics_up = false;
262 _all_down = new ArrayList<Prefix>();
263 _collapsed_down = new ArrayList<Prefix>();
264 _cleaned_spec_down = new ArrayList<Prefix>();
265 _has_specifics_down = false;
268 private final void analyzeGreatestCommonPrefixes( final List<Prefix> greatest_common_prefixes,
269 final String separator,
270 final double cutoff ) {
271 final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes, separator );
272 if ( !ForesterUtil.isEmpty( l ) ) {
273 sortPrefixesAccordingToConfidence( l );
274 _all = removeLessSpecificPrefixes( l );
275 _collapsed = collapse( _all );
276 _has_specifics = false;
278 _cleaned_spec = obtainSpecifics( cutoff, _all, _collapsed );
279 if ( !ForesterUtil.isEmpty( _cleaned_spec ) ) {
280 _has_specifics = true;
286 private final void analyzeGreatestCommonPrefixesUp( final List<Prefix> greatest_common_prefixes_up,
287 final String separator,
288 final double cutoff ) {
289 final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes_up, separator );
290 if ( !ForesterUtil.isEmpty( l ) ) {
291 sortPrefixesAccordingToConfidence( l );
292 _all_up = removeLessSpecificPrefixes( l );
293 _collapsed_up = collapse( _all_up );
294 _has_specifics_up = false;
296 _cleaned_spec_up = obtainSpecifics( cutoff, _all_up, _collapsed_up );
297 if ( !ForesterUtil.isEmpty( _cleaned_spec_up ) ) {
298 _has_specifics_up = true;
304 final void analyzeGreatestCommonPrefixesDown( final List<Prefix> greatest_common_prefixes_down,
305 final String separator,
306 final double cutoff ) {
307 final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes_down, separator );
308 if ( !ForesterUtil.isEmpty( l ) ) {
309 sortPrefixesAccordingToConfidence( l );
310 _all_down = removeLessSpecificPrefixes( l );
311 _collapsed_down = collapse( _all_down );
312 _has_specifics_down = false;
314 _cleaned_spec_down = obtainSpecifics( cutoff, _all_down, _collapsed_down );
315 if ( !ForesterUtil.isEmpty( _cleaned_spec_down ) ) {
316 _has_specifics_down = true;
322 final static List<Prefix> obtainSpecifics( final double cutoff,
323 final List<Prefix> cleaned,
324 final List<Prefix> collapsed ) {
325 final List<Prefix> cleaned_spec = new ArrayList<>();
326 final Set<String> collapsed_set = new HashSet<>();
327 for( final Prefix prefix : collapsed ) {
328 collapsed_set.add( prefix.getPrefix() );
330 final List<Prefix> spec = new ArrayList<>();
331 for( final Prefix prefix : cleaned ) {
332 if ( ( prefix.getConfidence() >= cutoff ) && !collapsed_set.contains( prefix.getPrefix() ) ) {
336 for( final Prefix o : spec ) {
338 for( final Prefix i : spec ) {
339 if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) ) ) {
345 cleaned_spec.add( o );
351 private final static List<Prefix> collapse( final List<Prefix> cleaned ) {
352 final List<Prefix> collapsed = new ArrayList<>();
353 final Set<String> firsts = new HashSet<>();
354 double confidence_sum = 0;
355 for( final Prefix prefix : cleaned ) {
356 final String f = prefix.getPrefixFirstElement();
357 if ( !firsts.contains( f ) ) {
359 collapsed.add( prefix );
360 confidence_sum += prefix.getConfidence();
363 if ( !ForesterUtil.isEqual( confidence_sum, 1.0, 1E-5 ) ) {
364 throw new IllegalArgumentException( "Confidences add up to " + confidence_sum + " instead of 1.0" );
370 * This replaces (by way of example)
376 * I.e. it removes less specific prefixes.
379 private final static List<Prefix> removeLessSpecificPrefixes( final List<Prefix> l ) {
380 final List<Prefix> cleaned = new ArrayList<>();
381 for( final Prefix o : l ) {
383 for( final Prefix i : l ) {
384 if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) )
385 && ForesterUtil.isEqual( i.getConfidence(),
386 o.getConfidence() ) ) {
398 private final static void sortPrefixesAccordingToConfidence( final List<Prefix> l ) {
399 Collections.sort( l, new Comparator<Prefix>() {
402 public int compare( final Prefix x, final Prefix y ) {
403 return compare( x.getConfidence(), y.getConfidence() );
406 private int compare( final double a, final double b ) {
407 return a > b ? -1 : a > b ? 1 : 0;
412 private final static List<Prefix> obtainAllPrefixes( final List<Prefix> greatest_common_prefixes,
413 final String separator ) {
414 final SortedMap<String, Double> map = new TreeMap<>();
415 for( final Prefix prefix : greatest_common_prefixes ) {
416 final List<String> prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator );
417 for( final String p : prefixes ) {
421 for( final String key : map.keySet() ) {
422 for( final Prefix prefix : greatest_common_prefixes ) {
423 if ( prefix.getPrefix().startsWith( key ) ) {
424 map.put( key, map.get( key ) + prefix.getConfidence() );
428 final List<Prefix> l = new ArrayList<>();
429 for( final Entry<String, Double> entry : map.entrySet() ) {
430 l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) );