// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2017 Christian M. Zmasek // Copyright (C) 2017 J. Craig Venter Institute // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phyloxml @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.clade_analysis; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import org.forester.util.ForesterUtil; import org.forester.util.UserException; public final class ResultMulti { private final static double MIN_DIFF = 1E-5; private final String _separator; private final List _greatest_common_prefixes = new ArrayList<>(); private final List _greatest_common_prefixes_up = new ArrayList<>(); private final List _greatest_common_prefixes_down = new ArrayList<>(); private List _all = null; private List _collapsed = null; private List _cleaned_spec = null; private boolean _has_specifics = false; private List _all_up = null; private List _collapsed_up = null; private List _cleaned_spec_up = null; private boolean _has_specifics_up = false; private List _all_down = null; private List _collapsed_down = null; private List _cleaned_spec_down = null; private boolean _has_specifics_down = false; private int _matches = 0; private int _ref_tree_ext_nodes = 0; private String _query_name_prefix = ""; ResultMulti( final String separator ) { if ( ForesterUtil.isEmpty( separator ) ) { throw new IllegalArgumentException( "separator must not be null or empty" ); } _separator = separator; reset(); } ResultMulti() { _separator = AnalysisMulti.DEFAULT_SEPARATOR; reset(); } public List getAllMultiHitPrefixesUp() { return _all_up; } public List getCollapsedMultiHitPrefixesUp() { return _collapsed_up; } public List getSpecificMultiHitPrefixesUp() { return _cleaned_spec_up; } public boolean isHasSpecificMultiHitsPrefixesUp() { return _has_specifics_up; } public List getAllMultiHitPrefixesDown() { return _all_down; } public List getCollapsedMultiHitPrefixesDown() { return _collapsed_down; } public List getSpecificMultiHitPrefixesDown() { return _cleaned_spec_down; } public boolean isHasSpecificMultiHitsPrefixesDown() { return _has_specifics_down; } public List getAllMultiHitPrefixes() { return _all; } public List getCollapsedMultiHitPrefixes() { return _collapsed; } public List getSpecificMultiHitPrefixes() { return _cleaned_spec; } public boolean isHasSpecificMultiHitsPrefixes() { return _has_specifics; } public String getQueryNamePrefix() { return _query_name_prefix; } public int getNumberOfMatches() { return _matches; } public int getReferenceTreeNumberOfExternalNodes() { return _ref_tree_ext_nodes; } @Override public final String toString() { final StringBuilder sb = new StringBuilder(); sb.append( "Query: " ); sb.append( getQueryNamePrefix() ); sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( "Matching Clade(s):" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix prefix : _collapsed ) { sb.append( prefix ); sb.append( ForesterUtil.LINE_SEPARATOR ); } if ( _has_specifics ) { sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( "Specific-hit(s):" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix prefix : _cleaned_spec ) { sb.append( prefix ); sb.append( ForesterUtil.LINE_SEPARATOR ); } sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( "Matching Clade(s) with Specific-hit(s):" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix prefix : _collapsed ) { sb.append( prefix ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix spec : _cleaned_spec ) { if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) { sb.append( " " + spec ); sb.append( ForesterUtil.LINE_SEPARATOR ); } } } } if ( !ForesterUtil.isEmpty( _all_down ) ) { sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( "Matching Down-tree Bracketing Clade(s):" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix prefix : _collapsed_down ) { sb.append( prefix ); sb.append( ForesterUtil.LINE_SEPARATOR ); } } if ( !ForesterUtil.isEmpty( _all_up ) ) { sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( "Matching Up-tree Bracketing Clade(s):" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix prefix : _collapsed_up ) { sb.append( prefix ); sb.append( ForesterUtil.LINE_SEPARATOR ); } } sb.append( ForesterUtil.LINE_SEPARATOR ); sb.append( "Total Number of Matches: " + getNumberOfMatches() + "/" + getReferenceTreeNumberOfExternalNodes() ); sb.append( ForesterUtil.LINE_SEPARATOR ); return sb.toString(); } void addGreatestCommonPrefix( final String prefix, final double confidence ) { _greatest_common_prefixes.add( new Prefix( prefix, confidence, _separator ) ); } void addGreatestCommonPrefixUp( final String prefix_up, final double confidence ) { _greatest_common_prefixes_up.add( new Prefix( prefix_up, confidence, _separator ) ); } void addGreatestCommonPrefixDown( final String prefix_down, final double confidence ) { _greatest_common_prefixes_down.add( new Prefix( prefix_down, confidence, _separator ) ); } void setQueryNamePrefix( final String query_name_prefix ) { if ( !ForesterUtil.isEmpty( _query_name_prefix ) ) { throw new IllegalStateException( "illegal attempt to change the query name prefix" ); } _query_name_prefix = query_name_prefix; } void setTotalNumberOfMatches( final int matches ) { if ( _matches > 0 ) { throw new IllegalStateException( "illegal attempt to change the number of matches" ); } _matches = matches; } public void setReferenceTreeNumberOfExternalNodes( final int ext_nodes ) { if ( _ref_tree_ext_nodes > 0 ) { throw new IllegalStateException( "illegal attempt to change the number of external nodes" ); } _ref_tree_ext_nodes = ext_nodes; } final void analyze( final double cutoff_for_specifics ) throws UserException { reset(); analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, cutoff_for_specifics ); analyzeGreatestCommonPrefixesUp( _greatest_common_prefixes_up, _separator, cutoff_for_specifics ); analyzeGreatestCommonPrefixesDown( _greatest_common_prefixes_down, _separator, cutoff_for_specifics ); } private final void reset() { _all = new ArrayList<>(); _collapsed = new ArrayList<>(); _cleaned_spec = new ArrayList<>(); _has_specifics = false; _all_up = new ArrayList<>(); _collapsed_up = new ArrayList<>(); _cleaned_spec_up = new ArrayList<>(); _has_specifics_up = false; _all_down = new ArrayList<>(); _collapsed_down = new ArrayList<>(); _cleaned_spec_down = new ArrayList<>(); _has_specifics_down = false; } private final void analyzeGreatestCommonPrefixes( final List greatest_common_prefixes, final String separator, final double cutoff ) throws UserException { final List l = obtainAllPrefixes( greatest_common_prefixes, separator ); if ( !ForesterUtil.isEmpty( l ) ) { sortPrefixesAccordingToConfidence( l ); _all = removeLessSpecificPrefixes( l, separator ); _collapsed = collapse( _all ); _has_specifics = false; if ( cutoff >= 0 ) { _cleaned_spec = obtainSpecifics( cutoff, _all, _collapsed, separator ); if ( !ForesterUtil.isEmpty( _cleaned_spec ) ) { _has_specifics = true; } } } } private final void analyzeGreatestCommonPrefixesUp( final List greatest_common_prefixes_up, final String separator, final double cutoff ) throws UserException { final List l = obtainAllPrefixes( greatest_common_prefixes_up, separator ); if ( !ForesterUtil.isEmpty( l ) ) { sortPrefixesAccordingToConfidence( l ); _all_up = removeLessSpecificPrefixes( l, separator ); _collapsed_up = collapse( _all_up ); _has_specifics_up = false; if ( cutoff >= 0 ) { _cleaned_spec_up = obtainSpecifics( cutoff, _all_up, _collapsed_up, separator ); if ( !ForesterUtil.isEmpty( _cleaned_spec_up ) ) { _has_specifics_up = true; } } } } final void analyzeGreatestCommonPrefixesDown( final List greatest_common_prefixes_down, final String separator, final double cutoff ) throws UserException { final List l = obtainAllPrefixes( greatest_common_prefixes_down, separator ); if ( !ForesterUtil.isEmpty( l ) ) { sortPrefixesAccordingToConfidence( l ); _all_down = removeLessSpecificPrefixes( l, separator ); _collapsed_down = collapse( _all_down ); _has_specifics_down = false; if ( cutoff >= 0 ) { _cleaned_spec_down = obtainSpecifics( cutoff, _all_down, _collapsed_down, separator ); if ( !ForesterUtil.isEmpty( _cleaned_spec_down ) ) { _has_specifics_down = true; } } } } final static List obtainSpecifics( final double cutoff, final List cleaned, final List collapsed, final String separator ) { final List cleaned_spec = new ArrayList<>(); final Set collapsed_set = new HashSet<>(); for( final Prefix prefix : collapsed ) { collapsed_set.add( prefix.getPrefix() ); } final List spec = new ArrayList<>(); for( final Prefix prefix : cleaned ) { if ( ( prefix.getConfidence() >= cutoff ) && !collapsed_set.contains( prefix.getPrefix() ) ) { spec.add( prefix ); } } for( final Prefix o : spec ) { boolean ok = true; for( final Prefix i : spec ) { if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( ForesterUtil.isContainsPrefix( i.getPrefix(), o.getPrefix(), separator ) ) ) { ok = false; break; } /* if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) ) ) { ok = false; break; }*/ } if ( ok ) { cleaned_spec.add( o ); } } return cleaned_spec; } private final static List collapse( final List cleaned ) throws UserException { final List collapsed = new ArrayList<>(); final Set firsts = new HashSet<>(); double confidence_sum = 0; for( final Prefix prefix : cleaned ) { final String f = prefix.getPrefixFirstElement(); if ( !firsts.contains( f ) ) { firsts.add( f ); collapsed.add( prefix ); confidence_sum += prefix.getConfidence(); } } if ( !ForesterUtil.isEqual( confidence_sum, 1.0, MIN_DIFF ) ) { throw new UserException( "confidences add up to " + confidence_sum + " instead of 1.0" ); } return collapsed; } /* * This replaces (by way of example) * A.1.1 0.9 * A.1 0.9 * with * A.1.1 0.9 * * I.e. it removes less specific prefixes. * */ private final static List removeLessSpecificPrefixes( final List l, final String separator ) { final List cleaned = new ArrayList<>(); for( final Prefix o : l ) { boolean ok = true; for( final Prefix i : l ) { /*if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) ) && ForesterUtil.isEqual( i.getConfidence(), o.getConfidence() ) ) { ok = false; break; }*/ if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( ForesterUtil.isContainsPrefix( i.getPrefix(), o.getPrefix(), separator ) ) && ForesterUtil.isEqual( i.getConfidence(), o.getConfidence() ) ) { ok = false; break; } } if ( ok ) { cleaned.add( o ); } } return cleaned; } private final static void sortPrefixesAccordingToConfidence( final List l ) { Collections.sort( l, new Comparator() { @Override public int compare( final Prefix x, final Prefix y ) { return compare( x.getConfidence(), y.getConfidence() ); } private int compare( final double a, final double b ) { return a > b ? -1 : a > b ? 1 : 0; } } ); } private final static List obtainAllPrefixes( final List greatest_common_prefixes, final String separator ) { final SortedMap map = new TreeMap<>(); for( final Prefix prefix : greatest_common_prefixes ) { final List prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator ); for( final String p : prefixes ) { map.put( p, 0.0 ); } } for( final String key : map.keySet() ) { for( final Prefix prefix : greatest_common_prefixes ) { if ( ForesterUtil.isContainsPrefix( prefix.getPrefix(), key, separator ) ) { map.put( key, map.get( key ) + prefix.getConfidence() ); } } } final List l = new ArrayList<>(); for( final Entry entry : map.entrySet() ) { l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) ); } return l; } }