2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2017 Christian M. Zmasek
6 // Copyright (C) 2017 J. Craig Venter Institute
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phyloxml @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.clade_analysis;
28 import java.util.ArrayList;
29 import java.util.Collections;
30 import java.util.Comparator;
31 import java.util.HashSet;
32 import java.util.List;
33 import java.util.Map.Entry;
35 import java.util.SortedMap;
36 import java.util.TreeMap;
38 import org.forester.util.ForesterUtil;
40 public final class Result2 {
42 private final String _separator;
43 private final List<Prefix> _greatest_common_prefixes = new ArrayList<>();
44 private String _greatest_common_prefix_up = "";
45 private String _greatest_common_prefix_down = "";
46 private final List<String> _warnings = new ArrayList<>();
47 private int _lec_ext_nodes = 0;
48 private int _p_ext_nodes = 0;
49 private String _greatest_common_clade_subtree_confidence = "";
50 private String _greatest_common_clade_subtree_confidence_up = "";
51 private String _greatest_common_clade_subtree_confidence_down = "";
52 private List<Prefix> _all = null;
53 private List<Prefix> _collapsed = null;
54 private List<Prefix> _cleaned_spec = null;
55 private boolean _has_specifics;
57 public Result2( final String separator ) {
58 _separator = separator;
62 _separator = ".";//TODO make const somewhere
65 public List<Prefix> getAllMultiHitPrefixes() {
69 public List<Prefix> getCollapsedMultiHitPrefixes() {
73 public List<Prefix> getSpecificMultiHitPrefixes() {
77 public boolean isHasSpecificMultiHitsPrefixes() {
78 return _has_specifics;
82 void addWarning( final String warning ) {
83 _warnings.add( warning );
86 void addGreatestCommonPrefix( final String prefix, final double confidence ) {
87 _greatest_common_prefixes.add( new Prefix( prefix, confidence, _separator ) );
90 void setGreatestCommonPrefixUp( final String greatest_common_prefix_up ) {
91 _greatest_common_prefix_up = greatest_common_prefix_up;
94 void setGreatestCommonPrefixDown( final String greatest_common_prefix_down ) {
95 _greatest_common_prefix_down = greatest_common_prefix_down;
98 void setGreatestCommonCladeSubtreeConfidence( final String greatest_common_clade_confidence ) {
99 _greatest_common_clade_subtree_confidence = greatest_common_clade_confidence;
102 void setGreatestCommonCladeUpSubtreeConfidence( final String greatest_common_clade_confidence_up ) {
103 _greatest_common_clade_subtree_confidence_up = greatest_common_clade_confidence_up;
106 void setGreatestCommonCladeDownSubtreeConfidence( final String greatest_common_clade_confidence_down ) {
107 _greatest_common_clade_subtree_confidence_down = greatest_common_clade_confidence_down;
110 // public String getGreatestCommonPrefix() {
111 // return _greatest_common_prefix;
113 public String getGreatestCommonPrefixUp() {
114 return _greatest_common_prefix_up;
117 public String getGreatestCommonPrefixDown() {
118 return _greatest_common_prefix_down;
121 public String getGreatestCommonCladeSubtreeConfidence() {
122 return _greatest_common_clade_subtree_confidence;
125 public String getGreatestCommonCladeUpSubtreeConfidence() {
126 return _greatest_common_clade_subtree_confidence_up;
129 public String getGreatestCommonCladeDownSubtreeConfidence() {
130 return _greatest_common_clade_subtree_confidence_down;
133 public List<String> getWarnings() {
137 void setLeastEncompassingCladeSize( final int lec_ext_nodes ) {
138 _lec_ext_nodes = lec_ext_nodes;
141 void setTreeSize( final int p_ext_nodes ) {
142 _p_ext_nodes = p_ext_nodes;
145 public int getLeastEncompassingCladeSize() {
146 return _lec_ext_nodes;
149 public int getTreeSize() {
153 public void analyzeGreatestCommonPrefixes( final double cutoff ) {
154 analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, cutoff );
157 public void analyzeGreatestCommonPrefixes() {
158 analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, -1 );
161 private final void analyzeGreatestCommonPrefixes( final List<Prefix> greatest_common_prefixes,
162 final String separator,
163 final double cutoff ) {
164 final List<Prefix> l = obtainAllPrefixes( greatest_common_prefixes, separator );
165 sortPrefixesAccordingToConfidence( l );
166 _all = removeLessSpecificPrefixes( l );
167 _collapsed = collapse( _all );
168 _has_specifics = false;
170 _cleaned_spec = obtainSpecifics( cutoff, _all, _collapsed );
171 if ( _cleaned_spec != null && _cleaned_spec.size() > 0 ) {
172 _has_specifics = true;
176 _cleaned_spec = null;
180 private final static List<Prefix> obtainSpecifics( final double cutoff,
181 final List<Prefix> cleaned,
182 final List<Prefix> collapsed ) {
183 final List<Prefix> cleaned_spec = new ArrayList<>();
184 final Set<String> collapsed_set = new HashSet<>();
185 for( final Prefix prefix : collapsed ) {
186 collapsed_set.add( prefix.getPrefix() );
188 final List<Prefix> spec = new ArrayList<>();
189 for( final Prefix prefix : cleaned ) {
190 if ( ( prefix.getConfidence() >= cutoff ) && !collapsed_set.contains( prefix.getPrefix() ) ) {
194 for( final Prefix o : spec ) {
196 for( final Prefix i : spec ) {
197 if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) ) ) {
203 cleaned_spec.add( o );
209 private final static List<Prefix> collapse( final List<Prefix> cleaned ) {
210 final List<Prefix> collapsed = new ArrayList<>();
211 final Set<String> firsts = new HashSet<>();
212 double confidence_sum = 0;
213 for( final Prefix prefix : cleaned ) {
214 final String f = prefix.getPrefixFirstElement();
215 if ( !firsts.contains( f ) ) {
217 collapsed.add( prefix );
218 confidence_sum += prefix.getConfidence();
221 if ( !ForesterUtil.isEqual( confidence_sum, 1.0, 1E-5 ) ) {
222 throw new IllegalArgumentException( "Confidences add up to " + confidence_sum + " instead of 1.0" );
228 * This replaces (by way of example)
234 * I.e. it removes less specific prefixes.
237 private final static List<Prefix> removeLessSpecificPrefixes( final List<Prefix> l ) {
238 final List<Prefix> cleaned = new ArrayList<>();
239 for( final Prefix o : l ) {
241 for( final Prefix i : l ) {
242 if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) )
243 && ForesterUtil.isEqual( i.getConfidence(),
244 o.getConfidence() ) ) {
256 private static void sortPrefixesAccordingToConfidence( final List<Prefix> l ) {
257 Collections.sort( l, new Comparator<Prefix>() {
260 public int compare( final Prefix x, final Prefix y ) {
261 return compare( x.getConfidence(), y.getConfidence() );
264 private int compare( final double a, final double b ) {
265 return a > b ? -1 : a > b ? 1 : 0;
270 private final static List<Prefix> obtainAllPrefixes( final List<Prefix> greatest_common_prefixes,
271 final String separator ) {
272 final SortedMap<String, Double> map = new TreeMap<>();
273 for( final Prefix prefix : greatest_common_prefixes ) {
274 final List<String> prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator );
275 for( final String p : prefixes ) {
279 for( final String key : map.keySet() ) {
280 for( final Prefix prefix : greatest_common_prefixes ) {
281 if ( prefix.getPrefix().startsWith( key ) ) {
282 map.put( key, map.get( key ) + prefix.getConfidence() );
286 final List<Prefix> l = new ArrayList<>();
287 for( final Entry<String, Double> entry : map.entrySet() ) {
288 l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) );
293 public final String toString() {
294 final StringBuilder sb = new StringBuilder();
295 //TODO add all other stuff
296 sb.append( "Cleaned:" );
297 sb.append( ForesterUtil.LINE_SEPARATOR );
298 for( final Prefix prefix : _all ) {
300 sb.append( ForesterUtil.LINE_SEPARATOR );
302 sb.append( ForesterUtil.LINE_SEPARATOR );
303 sb.append( "Collapsed:" );
304 sb.append( ForesterUtil.LINE_SEPARATOR );
305 for( final Prefix prefix : _collapsed ) {
307 sb.append( ForesterUtil.LINE_SEPARATOR );
309 if ( _has_specifics ) {
310 sb.append( ForesterUtil.LINE_SEPARATOR );
311 sb.append( "Specifics:" );
312 sb.append( ForesterUtil.LINE_SEPARATOR );
313 for( final Prefix prefix : _cleaned_spec ) {
315 sb.append( ForesterUtil.LINE_SEPARATOR );
317 sb.append( ForesterUtil.LINE_SEPARATOR );
318 sb.append( "Collapsed with specifics:" );
319 sb.append( ForesterUtil.LINE_SEPARATOR );
320 for( final Prefix prefix : _collapsed ) {
322 sb.append( ForesterUtil.LINE_SEPARATOR );
323 for( final Prefix spec : _cleaned_spec ) {
324 if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
325 sb.append( " " + spec );
326 sb.append( ForesterUtil.LINE_SEPARATOR );
331 return sb.toString();