From c3a4da0b4a3bd5bfb06acec6717f6b2e138eb480 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Tue, 22 Aug 2017 12:08:12 -0700 Subject: [PATCH] in progress.... --- .../src/org/forester/clade_analysis/Analysis2.java | 117 ++++++++-- .../forester/clade_analysis/CladeAnalysisTest.java | 12 +- .../src/org/forester/clade_analysis/Result2.java | 227 ++++++++++++++++---- .../java/src/org/forester/util/ForesterUtil.java | 2 +- 4 files changed, 299 insertions(+), 59 deletions(-) diff --git a/forester/java/src/org/forester/clade_analysis/Analysis2.java b/forester/java/src/org/forester/clade_analysis/Analysis2.java index f3b8cae..f565b55 100644 --- a/forester/java/src/org/forester/clade_analysis/Analysis2.java +++ b/forester/java/src/org/forester/clade_analysis/Analysis2.java @@ -76,10 +76,10 @@ public final class Analysis2 { qnode_ext_nodes_names.add( name ); } } - final int lec_ext_nodes = qnode_ext_nodes_names.size(); - final int p_ext_nodes = p.getNumberOfExternalNodes() - 1; + //final int lec_ext_nodes = qnode_ext_nodes_names.size(); + //final int p_ext_nodes = p.getNumberOfExternalNodes() - 1; final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names, separator ); - System.out.println( greatest_common_prefix ); + // System.out.println( greatest_common_prefix ); Matcher matcher = query.matcher( qnode.getName() ); String conf_str = null; if ( matcher.find() ) { @@ -88,8 +88,6 @@ public final class Analysis2 { else { throw new IllegalStateException( "pattern did not match -- this should have never happened!" ); } - res.setLeastEncompassingCladeSize( lec_ext_nodes ); - res.setTreeSize( p_ext_nodes ); final double conf = Double.parseDouble( conf_str ); if ( !ForesterUtil.isEmpty( greatest_common_prefix ) ) { res.addGreatestCommonPrefix( greatest_common_prefix, conf ); @@ -97,6 +95,35 @@ public final class Analysis2 { else { res.addGreatestCommonPrefix( "?", conf ); } + //final String greatest_common_prefix_up[] = analyzeSiblings( qnode_p, qnode_pp, separator, query, res ); + final String greatest_common_prefix_up = analyzeSiblings( qnode_p, qnode_pp, separator, query ); + System.out.println( "greatest_common_prefix_up=" + greatest_common_prefix_up + " " + conf); + if ( !ForesterUtil.isEmpty( greatest_common_prefix_up) ) { + res.addGreatestCommonPrefixUp( greatest_common_prefix_up, conf ); + } + else { + res.addGreatestCommonPrefixUp( "?", conf ); + } + // res.addGreatestCommonPrefixUp( greatest_common_prefix_up, conf ); + //res.addGreatestCommonPrefixUp( greatest_common_prefix_up[ 0 ], 0.1 ); + // res.setGreatestCommonPrefixUp( greatest_common_prefix_up[ 0 ] ); + //if ( greatest_common_prefix_up[ 1 ] != null ) { + // res.setGreatestCommonCladeUpSubtreeConfidence( greatest_common_prefix_up[ 1 ] ); + // } + // final String greatest_common_prefix_down[] = analyzeSiblings( qnode, qnode_p, separator,query, res ); + final String greatest_common_prefix_down = analyzeSiblings( qnode, qnode_p, separator, query ); + System.out.println( "greatest_common_prefix_down=" + greatest_common_prefix_down+ " " + conf); + if ( !ForesterUtil.isEmpty( greatest_common_prefix_down) ) { + res.addGreatestCommonPrefixDown( greatest_common_prefix_down, conf ); + } + else { + res.addGreatestCommonPrefixDown( "?", conf ); + } + //res.addGreatestCommonPrefixDown( greatest_common_prefix_down, conf ); + // res.setGreatestCommonPrefixDown( greatest_common_prefix_down[ 0 ] ); + // if ( greatest_common_prefix_down[ 1 ] != null ) { + // res.setGreatestCommonCladeDownSubtreeConfidence( greatest_common_prefix_down[ 1 ] ); + // } } /* for( final PhylogenyNode qnode_ext_node : qnode_ext_nodes ) { String name = qnode_ext_node.getName(); @@ -136,13 +163,16 @@ public final class Analysis2 { return res; } - private final static String[] analyzeSiblings( final PhylogenyNode child, - final PhylogenyNode parent, - final String separator ) { + private final static void analyzeSiblingsOLD( final PhylogenyNode child, + final PhylogenyNode parent, + final String separator, + final Pattern query, + Result2 res, + double conf2 ) { final int child_index = child.getChildNodeIndex(); final List ext_nodes_names = new ArrayList<>(); final List descs = parent.getDescendants(); - String conf = null; + // String conf = null; for( int i = 0; i < descs.size(); ++i ) { if ( i != child_index ) { final PhylogenyNode d = descs.get( i ); @@ -153,13 +183,76 @@ public final class Analysis2 { } ext_nodes_names.add( name.trim() ); } - if ( descs.size() == 2 ) { - conf = obtainConfidence( d ); + // if ( descs.size() == 2 ) { + // conf = obtainConfidence( d ); + // } + } + } + //////////////////////////////////////////////////////////// + /* Matcher matcher = query.matcher( child.getName() ); + String conf_str = null; + if ( matcher.find() ) { + conf_str = matcher.group( 1 ); + } + else { + throw new IllegalStateException( "pattern did not match for \"" + child.getName() + "\" -- this should have never happened!" ); + }*/ + //////////////////////////////////////////////////////////// + final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names, separator ); + //final double conf = Double.parseDouble( conf_str ); + if ( !ForesterUtil.isEmpty( greatest_common_prefix ) ) { + res.addGreatestCommonPrefix( greatest_common_prefix, conf2 ); + } + else { + res.addGreatestCommonPrefix( "?", conf2 ); + } + } + + private final static String analyzeSiblings( final PhylogenyNode child, + final PhylogenyNode parent, + final String separator, + final Pattern query) { + final int child_index = child.getChildNodeIndex(); + final List ext_nodes_names = new ArrayList<>(); + final List descs = parent.getDescendants(); + // String conf = null; + for( int i = 0; i < descs.size(); ++i ) { + if ( i != child_index ) { + final PhylogenyNode d = descs.get( i ); + for( final PhylogenyNode n : d.getAllExternalDescendants() ) { + final String name = n.getName(); + if ( ForesterUtil.isEmptyTrimmed( name ) ) { + throw new IllegalArgumentException( "external node(s) with empty names found" ); + } + + //// + + final Matcher m = query.matcher( name ); + if ( !m.find() ) { + ext_nodes_names.add( name ); + } + + //// + } + // if ( descs.size() == 2 ) { + // conf = obtainConfidence( d ); + // } } } + //////////////////////////////////////////////////////////// + /* Matcher matcher = query.matcher( child.getName() ); + String conf_str = null; + if ( matcher.find() ) { + conf_str = matcher.group( 1 ); + } + else { + throw new IllegalStateException( "pattern did not match for \"" + child.getName() + "\" -- this should have never happened!" ); + }*/ + //////////////////////////////////////////////////////////// final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names, separator ); - return new String[] { greatest_common_prefix, conf }; + //final double conf = Double.parseDouble( conf_str ); + return greatest_common_prefix; } private final static String obtainConfidence( final PhylogenyNode n ) { diff --git a/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java b/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java index 5a5afea..f5a9d92 100644 --- a/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java +++ b/forester/java/src/org/forester/clade_analysis/CladeAnalysisTest.java @@ -19,7 +19,7 @@ public class CladeAnalysisTest { public static void main( final String[] args ) { boolean failed = false; - if ( !testCladeAnalysis1() ) { + /* if ( !testCladeAnalysis1() ) { System.out.println( "Clade analysis 1 failed" ); failed = true; } @@ -34,15 +34,15 @@ public class CladeAnalysisTest { if ( !testCladeAnalysis4() ) { System.out.println( "Clade analysis 4 failed" ); failed = true; - } + */ if ( !testCladeAnalysis5() ) { System.out.println( "Clade analysis 5 failed" ); failed = true; } - if ( !testCladeAnalysis6() ) { + /* if ( !testCladeAnalysis6() ) { System.out.println( "Clade analysis 6 failed" ); failed = true; - } + }*/ if ( !failed ) { System.out.println( "OK" ); } @@ -620,7 +620,9 @@ public class CladeAnalysisTest { res1.addGreatestCommonPrefix( "A.1.2", 0.3 ); res1.addGreatestCommonPrefix( "A.1.3", 0.3 ); res1.addGreatestCommonPrefix( "B.1", 0.1 ); - res1.analyzeGreatestCommonPrefixes( 0.3 ); + res1.analyzeGreatestCommonPrefixes( 0.5 ); + //res1.analyzeGreatestCommonPrefixesDown(0.5); + //res1.analyzeGreatestCommonPrefixesUp(0.5); System.out.print( res1.toString()); System.out.println( "------------------------- "); System.out.println(); diff --git a/forester/java/src/org/forester/clade_analysis/Result2.java b/forester/java/src/org/forester/clade_analysis/Result2.java index 1cfc3ff..0135bec 100644 --- a/forester/java/src/org/forester/clade_analysis/Result2.java +++ b/forester/java/src/org/forester/clade_analysis/Result2.java @@ -40,19 +40,25 @@ import org.forester.util.ForesterUtil; public final class Result2 { private final String _separator; - private final List _greatest_common_prefixes = new ArrayList<>(); - private String _greatest_common_prefix_up = ""; - private String _greatest_common_prefix_down = ""; + private final List _greatest_common_prefixes = new ArrayList(); + private List _greatest_common_prefixes_up = new ArrayList(); + private List _greatest_common_prefixes_down = new ArrayList(); private final List _warnings = new ArrayList<>(); - private int _lec_ext_nodes = 0; - private int _p_ext_nodes = 0; private String _greatest_common_clade_subtree_confidence = ""; private String _greatest_common_clade_subtree_confidence_up = ""; private String _greatest_common_clade_subtree_confidence_down = ""; private List _all = null; private List _collapsed = null; private List _cleaned_spec = null; - private boolean _has_specifics; + private boolean _has_specifics = false; + private List _all_up = null; + private List _collapsed_up = null; + private List _cleaned_spec_up = null; + private boolean _has_specifics_up = false; + private List _all_down = null; + private List _collapsed_down = null; + private List _cleaned_spec_down = null; + private boolean _has_specifics_down = false; public Result2( final String separator ) { _separator = separator; @@ -62,23 +68,54 @@ public final class Result2 { _separator = ".";//TODO make const somewhere } + public List getAllMultiHitPrefixesUp() { + return _all_up; + } + + public List getCollapsedMultiHitPrefixesUp() { + return _collapsed_up; + } + + public List getSpecificMultiHitPrefixesUp() { + return _cleaned_spec_up; + } + + public boolean isHasSpecificMultiHitsPrefixesUp() { + return _has_specifics_up; + } + + public List getAllMultiHitPrefixesDown() { + return _all_down; + } + + public List getCollapsedMultiHitPrefixesDown() { + return _collapsed_down; + } + + public List getSpecificMultiHitPrefixesDown() { + return _cleaned_spec_down; + } + + public boolean isHasSpecificMultiHitsPrefixesDown() { + return _has_specifics_down; + } + public List getAllMultiHitPrefixes() { return _all; } - + public List getCollapsedMultiHitPrefixes() { return _collapsed; } - + public List getSpecificMultiHitPrefixes() { return _cleaned_spec; } - + public boolean isHasSpecificMultiHitsPrefixes() { return _has_specifics; } - - + void addWarning( final String warning ) { _warnings.add( warning ); } @@ -87,12 +124,12 @@ public final class Result2 { _greatest_common_prefixes.add( new Prefix( prefix, confidence, _separator ) ); } - void setGreatestCommonPrefixUp( final String greatest_common_prefix_up ) { - _greatest_common_prefix_up = greatest_common_prefix_up; + void addGreatestCommonPrefixUp( final String prefix_up, final double confidence ) { + _greatest_common_prefixes_up.add( new Prefix( prefix_up, confidence, _separator ) ); } - void setGreatestCommonPrefixDown( final String greatest_common_prefix_down ) { - _greatest_common_prefix_down = greatest_common_prefix_down; + void addGreatestCommonPrefixDown( final String prefix_down, final double confidence ) { + _greatest_common_prefixes_down.add( new Prefix( prefix_down, confidence, _separator ) ); } void setGreatestCommonCladeSubtreeConfidence( final String greatest_common_clade_confidence ) { @@ -107,17 +144,6 @@ public final class Result2 { _greatest_common_clade_subtree_confidence_down = greatest_common_clade_confidence_down; } - // public String getGreatestCommonPrefix() { - // return _greatest_common_prefix; - // } - public String getGreatestCommonPrefixUp() { - return _greatest_common_prefix_up; - } - - public String getGreatestCommonPrefixDown() { - return _greatest_common_prefix_down; - } - public String getGreatestCommonCladeSubtreeConfidence() { return _greatest_common_clade_subtree_confidence; } @@ -134,28 +160,30 @@ public final class Result2 { return _warnings; } - void setLeastEncompassingCladeSize( final int lec_ext_nodes ) { - _lec_ext_nodes = lec_ext_nodes; + public void analyzeGreatestCommonPrefixes( final double cutoff_for_specifics ) { + analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, cutoff_for_specifics ); + analyzeGreatestCommonPrefixesUp( _greatest_common_prefixes_up, _separator, cutoff_for_specifics ); + analyzeGreatestCommonPrefixesDown( _greatest_common_prefixes_down, _separator, cutoff_for_specifics ); } - void setTreeSize( final int p_ext_nodes ) { - _p_ext_nodes = p_ext_nodes; + public void analyzeGreatestCommonPrefixes() { + analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, -1 ); } - public int getLeastEncompassingCladeSize() { - return _lec_ext_nodes; + private void analyzeGreatestCommonPrefixesUp( final double cutoff_for_specifics ) { + analyzeGreatestCommonPrefixesUp( _greatest_common_prefixes_up, _separator, cutoff_for_specifics ); } - public int getTreeSize() { - return _p_ext_nodes; + private void analyzeGreatestCommonPrefixesUp() { + analyzeGreatestCommonPrefixesUp( _greatest_common_prefixes_up, _separator, -1 ); } - public void analyzeGreatestCommonPrefixes( final double cutoff ) { - analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, cutoff ); + private void analyzeGreatestCommonPrefixesDown( final double cutoff_for_specifics ) { + analyzeGreatestCommonPrefixesDown( _greatest_common_prefixes_down, _separator, cutoff_for_specifics ); } - public void analyzeGreatestCommonPrefixes() { - analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator, -1 ); + private void analyzeGreatestCommonPrefixesDown() { + analyzeGreatestCommonPrefixesDown( _greatest_common_prefixes_down, _separator, -1 ); } private final void analyzeGreatestCommonPrefixes( final List greatest_common_prefixes, @@ -177,6 +205,44 @@ public final class Result2 { } } + private final void analyzeGreatestCommonPrefixesUp( final List greatest_common_prefixes_up, + final String separator, + final double cutoff ) { + final List l = obtainAllPrefixes( greatest_common_prefixes_up, separator ); + sortPrefixesAccordingToConfidence( l ); + _all_up = removeLessSpecificPrefixes( l ); + _collapsed_up = collapse( _all_up ); + _has_specifics_up = false; + if ( cutoff >= 0 ) { + _cleaned_spec_up = obtainSpecifics( cutoff, _all_up, _collapsed_up ); + if ( _cleaned_spec_up != null && _cleaned_spec_up.size() > 0 ) { + _has_specifics_up = true; + } + } + else { + _cleaned_spec_up = null; + } + } + + private final void analyzeGreatestCommonPrefixesDown( final List greatest_common_prefixes_down, + final String separator, + final double cutoff ) { + final List l = obtainAllPrefixes( greatest_common_prefixes_down, separator ); + sortPrefixesAccordingToConfidence( l ); + _all_down = removeLessSpecificPrefixes( l ); + _collapsed_down = collapse( _all_down ); + _has_specifics_down = false; + if ( cutoff >= 0 ) { + _cleaned_spec_down = obtainSpecifics( cutoff, _all_down, _collapsed_down ); + if ( _cleaned_spec_down != null && _cleaned_spec_down.size() > 0 ) { + _has_specifics_down = true; + } + } + else { + _cleaned_spec_down = null; + } + } + private final static List obtainSpecifics( final double cutoff, final List cleaned, final List collapsed ) { @@ -219,7 +285,7 @@ public final class Result2 { } } if ( !ForesterUtil.isEqual( confidence_sum, 1.0, 1E-5 ) ) { - throw new IllegalArgumentException( "Confidences add up to " + confidence_sum + " instead of 1.0" ); + // throw new IllegalArgumentException( "Confidences add up to " + confidence_sum + " instead of 1.0" ); } return collapsed; } @@ -253,7 +319,7 @@ public final class Result2 { return cleaned; } - private static void sortPrefixesAccordingToConfidence( final List l ) { + private final static void sortPrefixesAccordingToConfidence( final List l ) { Collections.sort( l, new Comparator() { @Override @@ -315,7 +381,7 @@ public final class Result2 { sb.append( ForesterUtil.LINE_SEPARATOR ); } sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( "Collapsed with specifics:" ); + sb.append( "Collapsed With Specifics:" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix prefix : _collapsed ) { sb.append( prefix ); @@ -328,6 +394,85 @@ public final class Result2 { } } } + ////// + if ( _all_down != null ) { + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Cleaned Down:" ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix prefix : _all_down ) { + sb.append( prefix ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Collapsed Down:" ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix prefix : _collapsed_down ) { + sb.append( prefix ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + if ( _has_specifics_down ) { + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Specifics Down:" ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix prefix : _cleaned_spec_down ) { + sb.append( prefix ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Collapsed With Specifics Down:" ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix prefix : _collapsed_down ) { + sb.append( prefix ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix spec : _cleaned_spec_down ) { + if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) { + sb.append( " " + spec ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + } + } + } + } + ////// + if ( _all_up != null ) { + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Cleaned Up:" ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix prefix : _all_up ) { + sb.append( prefix ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Collapsed Up:" ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix prefix : _collapsed_up ) { + sb.append( prefix ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + if ( _has_specifics ) { + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Specifics Up:" ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix prefix : _cleaned_spec_up ) { + sb.append( prefix ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + sb.append( ForesterUtil.LINE_SEPARATOR ); + sb.append( "Collapsed With Specifics Up:" ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix prefix : _collapsed_up ) { + sb.append( prefix ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( final Prefix spec : _cleaned_spec_up ) { + if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) { + sb.append( " " + spec ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + } + } + } + } + ///// return sb.toString(); } } diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index e453dad..c254c8b 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -1675,7 +1675,7 @@ public final class ForesterUtil { sb.append( separator ); } } - //System.out.println( sb.toString() ); + // System.out.println( sb.toString() ); l.add( sb.toString()); } return l; -- 1.7.10.2