analyze pplacer output...
[jalview.git] / forester / java / src / org / forester / application / pplacer_summary.java
1
2 package org.forester.application;
3
4 import java.io.File;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.List;
8 import java.util.regex.Matcher;
9 import java.util.regex.Pattern;
10
11 import org.forester.phylogeny.Phylogeny;
12 import org.forester.phylogeny.PhylogenyNode;
13 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
14 import org.forester.phylogeny.factories.PhylogenyFactory;
15 import org.forester.util.BasicDescriptiveStatistics;
16
17 public class pplacer_summary {
18
19     public static void main( final String args[] ) {
20         final File indir = new File( "." );
21         final File[] list_of_files = indir.listFiles();
22         final List<File> infiles = new ArrayList<>();
23         for( final File file : list_of_files ) {
24             if ( file.isFile() && file.canRead() && file.toString().endsWith( ".sing.tre" ) ) {
25                 infiles.add( file );
26             }
27         }
28         Collections.sort( infiles );
29         final BasicDescriptiveStatistics non_unique_placements_stats = new BasicDescriptiveStatistics();
30         final BasicDescriptiveStatistics unexpected_top_placements_stats = new BasicDescriptiveStatistics();
31         final BasicDescriptiveStatistics unexpected_unique_top_placements_stats = new BasicDescriptiveStatistics();
32         for( final File infile : infiles ) {
33             Phylogeny phys[] = null;
34             try {
35                 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
36                 phys = factory.create( infile,
37                                        org.forester.io.parsers.util.ParserUtils
38                                                .createParserDependingOnFileType( infile, true ) );
39             }
40             catch ( final Exception e ) {
41                 System.out.println( "Could not read " + infile + ":" + e );
42                 System.exit( -1 );
43             }
44             final Pattern p = Pattern.compile( "_Q_#\\d+_M=(.+)" );
45             int total_trees = 0;
46             int total_placements = 0;
47             int total_expected_top_placements = 0;
48             int total_unexpected_top_placements = 0;
49             int total_unexpected_unique_top_placements = 0;
50             int unique_placements = 0;
51             int non_unique_placements = 0;
52             int m1_placements = 0;
53             int non_m1_placements = 0;
54             for( final Phylogeny phy : phys ) {
55                 ++total_trees;
56                 final List<PhylogenyNode> nodes = phy.getNodes( p );
57                 if ( nodes.isEmpty() ) {
58                     System.out.println();
59                     System.out.println( "not found!" );
60                     System.exit( -1 );
61                 }
62                 else {
63                     total_placements++;
64                     if ( nodes.size() == 1 ) {
65                         unique_placements++;
66                     }
67                     else {
68                         non_unique_placements++;
69                     }
70                     final PhylogenyNode n = nodes.get( 0 );
71                     if ( n.isExternal() && !n.isRoot() ) {
72                         final Matcher m = p.matcher( n.getName() );
73                         if ( m.find() ) {
74                             final double M = Double.parseDouble( m.group( 1 ) );
75                             if ( M > 0.999999 ) {
76                                 m1_placements++;
77                             }
78                             else {
79                                 non_m1_placements++;
80                             }
81                         }
82                         else {
83                             System.out.println();
84                             System.out.println( "no match!" );
85                             System.exit( -1 );
86                         }
87                         String sib = null;
88                         if ( n.getChildNodeIndex() == 0 ) {
89                             sib = n.getParent().getChildNode2().getName();
90                         }
91                         else if ( n.getChildNodeIndex() == 1 ) {
92                             sib = n.getParent().getChildNode1().getName();
93                         }
94                         else {
95                             System.out.println();
96                             System.out.println( "more than two children!" );
97                             System.exit( -1 );
98                         }
99                         //  System.out.println( n.getName() + "->" + sib );
100                         if ( n.getName().startsWith( sib ) ) {
101                             total_expected_top_placements++;
102                         }
103                         else {
104                             total_unexpected_top_placements++;
105                             if ( nodes.size() == 1 ) {
106                                 total_unexpected_unique_top_placements++;
107                             }
108                         }
109                     }
110                 }
111             }
112             System.out.println();
113             System.out.println( infile.getName() );
114             final Pattern pa = Pattern.compile( "(\\d+)-\\d+" );
115             final Matcher m = pa.matcher( infile.getName() );
116             if ( m.find() ) {
117                 final int start = Integer.parseInt( m.group( 1 ) );
118                 //      System.out.println( start + "\t"
119                 //              + ( ( ( double ) total_unexpected_top_placements ) / total_placements ) );
120             }
121             System.out.println( "total trees" + "\t" + total_trees );
122             System.out.println( "total placements" + "\t" + total_placements );
123             System.out.println( "total expected top placements" + "\t" + total_expected_top_placements );
124             System.out.println( "total un-expected top placements" + "\t" + total_unexpected_top_placements );
125             System.out.println( "total un-expected unique placements" + "\t" + total_unexpected_unique_top_placements );
126             System.out.println( "unique placements" + "\t" + unique_placements );
127             System.out.println( "non unique placements" + "\t" + non_unique_placements );
128             System.out.println( "m1 placements" + "\t" + m1_placements );
129             System.out.println( "non m1 placements" + "\t" + non_m1_placements );
130             non_unique_placements_stats.addValue( ( ( double ) non_unique_placements ) / total_placements );
131             unexpected_top_placements_stats
132                     .addValue( ( ( double ) total_unexpected_top_placements ) / total_placements );
133             unexpected_unique_top_placements_stats
134                     .addValue( ( ( double ) total_unexpected_unique_top_placements ) / total_placements );
135         }
136         System.out.println( "Non-unique placements: Mean\t" + non_unique_placements_stats.arithmeticMean() );
137         System.out.println( "Non-unique placements: Min\t" + non_unique_placements_stats.getMin() );
138         System.out.println( "Non-unique placements: Max\t" + non_unique_placements_stats.getMax() );
139         System.out.println( "Unexpected top-placements: Mean\t" + unexpected_top_placements_stats.arithmeticMean() );
140         System.out.println( "Unexpected top-placements: Min\t" + unexpected_top_placements_stats.getMin() );
141         System.out.println( "Unexpected top-placements: Max\t" + unexpected_top_placements_stats.getMax() );
142         System.out.println( "Unexpected unique placements: Mean\t"
143                 + unexpected_unique_top_placements_stats.arithmeticMean() );
144         System.out.println( "Unexpected unique placements: Min\t" + unexpected_unique_top_placements_stats.getMin() );
145         System.out.println( "Unexpected unique placements: Max\t" + unexpected_unique_top_placements_stats.getMax() );
146     }
147 }