From: cmzmasek Date: Mon, 12 Jun 2017 22:45:38 +0000 (-0700) Subject: analyze pplacer output... X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=bbe277fd373c572f60d13543087244d3dd58128f;p=jalview.git analyze pplacer output... --- diff --git a/forester/java/src/org/forester/application/pplacer_summary.java b/forester/java/src/org/forester/application/pplacer_summary.java new file mode 100644 index 0000000..91a488a --- /dev/null +++ b/forester/java/src/org/forester/application/pplacer_summary.java @@ -0,0 +1,147 @@ + +package org.forester.application; + +import java.io.File; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.util.BasicDescriptiveStatistics; + +public class pplacer_summary { + + public static void main( final String args[] ) { + final File indir = new File( "." ); + final File[] list_of_files = indir.listFiles(); + final List infiles = new ArrayList<>(); + for( final File file : list_of_files ) { + if ( file.isFile() && file.canRead() && file.toString().endsWith( ".sing.tre" ) ) { + infiles.add( file ); + } + } + Collections.sort( infiles ); + final BasicDescriptiveStatistics non_unique_placements_stats = new BasicDescriptiveStatistics(); + final BasicDescriptiveStatistics unexpected_top_placements_stats = new BasicDescriptiveStatistics(); + final BasicDescriptiveStatistics unexpected_unique_top_placements_stats = new BasicDescriptiveStatistics(); + for( final File infile : infiles ) { + Phylogeny phys[] = null; + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + phys = factory.create( infile, + org.forester.io.parsers.util.ParserUtils + .createParserDependingOnFileType( infile, true ) ); + } + catch ( final Exception e ) { + System.out.println( "Could not read " + infile + ":" + e ); + System.exit( -1 ); + } + final Pattern p = Pattern.compile( "_Q_#\\d+_M=(.+)" ); + int total_trees = 0; + int total_placements = 0; + int total_expected_top_placements = 0; + int total_unexpected_top_placements = 0; + int total_unexpected_unique_top_placements = 0; + int unique_placements = 0; + int non_unique_placements = 0; + int m1_placements = 0; + int non_m1_placements = 0; + for( final Phylogeny phy : phys ) { + ++total_trees; + final List nodes = phy.getNodes( p ); + if ( nodes.isEmpty() ) { + System.out.println(); + System.out.println( "not found!" ); + System.exit( -1 ); + } + else { + total_placements++; + if ( nodes.size() == 1 ) { + unique_placements++; + } + else { + non_unique_placements++; + } + final PhylogenyNode n = nodes.get( 0 ); + if ( n.isExternal() && !n.isRoot() ) { + final Matcher m = p.matcher( n.getName() ); + if ( m.find() ) { + final double M = Double.parseDouble( m.group( 1 ) ); + if ( M > 0.999999 ) { + m1_placements++; + } + else { + non_m1_placements++; + } + } + else { + System.out.println(); + System.out.println( "no match!" ); + System.exit( -1 ); + } + String sib = null; + if ( n.getChildNodeIndex() == 0 ) { + sib = n.getParent().getChildNode2().getName(); + } + else if ( n.getChildNodeIndex() == 1 ) { + sib = n.getParent().getChildNode1().getName(); + } + else { + System.out.println(); + System.out.println( "more than two children!" ); + System.exit( -1 ); + } + // System.out.println( n.getName() + "->" + sib ); + if ( n.getName().startsWith( sib ) ) { + total_expected_top_placements++; + } + else { + total_unexpected_top_placements++; + if ( nodes.size() == 1 ) { + total_unexpected_unique_top_placements++; + } + } + } + } + } + System.out.println(); + System.out.println( infile.getName() ); + final Pattern pa = Pattern.compile( "(\\d+)-\\d+" ); + final Matcher m = pa.matcher( infile.getName() ); + if ( m.find() ) { + final int start = Integer.parseInt( m.group( 1 ) ); + // System.out.println( start + "\t" + // + ( ( ( double ) total_unexpected_top_placements ) / total_placements ) ); + } + System.out.println( "total trees" + "\t" + total_trees ); + System.out.println( "total placements" + "\t" + total_placements ); + System.out.println( "total expected top placements" + "\t" + total_expected_top_placements ); + System.out.println( "total un-expected top placements" + "\t" + total_unexpected_top_placements ); + System.out.println( "total un-expected unique placements" + "\t" + total_unexpected_unique_top_placements ); + System.out.println( "unique placements" + "\t" + unique_placements ); + System.out.println( "non unique placements" + "\t" + non_unique_placements ); + System.out.println( "m1 placements" + "\t" + m1_placements ); + System.out.println( "non m1 placements" + "\t" + non_m1_placements ); + non_unique_placements_stats.addValue( ( ( double ) non_unique_placements ) / total_placements ); + unexpected_top_placements_stats + .addValue( ( ( double ) total_unexpected_top_placements ) / total_placements ); + unexpected_unique_top_placements_stats + .addValue( ( ( double ) total_unexpected_unique_top_placements ) / total_placements ); + } + System.out.println( "Non-unique placements: Mean\t" + non_unique_placements_stats.arithmeticMean() ); + System.out.println( "Non-unique placements: Min\t" + non_unique_placements_stats.getMin() ); + System.out.println( "Non-unique placements: Max\t" + non_unique_placements_stats.getMax() ); + System.out.println( "Unexpected top-placements: Mean\t" + unexpected_top_placements_stats.arithmeticMean() ); + System.out.println( "Unexpected top-placements: Min\t" + unexpected_top_placements_stats.getMin() ); + System.out.println( "Unexpected top-placements: Max\t" + unexpected_top_placements_stats.getMax() ); + System.out.println( "Unexpected unique placements: Mean\t" + + unexpected_unique_top_placements_stats.arithmeticMean() ); + System.out.println( "Unexpected unique placements: Min\t" + unexpected_unique_top_placements_stats.getMin() ); + System.out.println( "Unexpected unique placements: Max\t" + unexpected_unique_top_placements_stats.getMax() ); + } +}