in pprogress...
[jalview.git] / forester_applications / src / org / forester / applications / genome_counts_for_once_appearing_dcs.java
1
2 package org.forester.applications;
3
4 // $Id:
5 // FORESTER -- software libraries and applications
6 // for evolutionary biology research and applications.
7 //
8 // Copyright (C) 2008-2011 Christian M. Zmasek
9 // Copyright (C) 2008-2011 Burnham Institute for Medical Research
10 // All rights reserved
11 //
12 // This library is free software; you can redistribute it and/or
13 // modify it under the terms of the GNU Lesser General Public
14 // License as published by the Free Software Foundation; either
15 // version 2.1 of the License, or (at your option) any later version.
16 //
17 // This library is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 // Lesser General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License along with this library; if not, write to the Free Software
24 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 //
26 // Contact: phylosoft @ gmail . com
27 // WWW: www.phylosoft.org/forester
28 // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar
29 // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/genome_counts_for_once_appearing_dcs.java
30 // java -Xmx2048m -cp
31 // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar
32 // org.forester.applications.genome_counts_for_once_appearing_dcs
33 import java.io.File;
34 import java.util.HashMap;
35 import java.util.List;
36 import java.util.Map;
37 import java.util.Set;
38 import java.util.SortedSet;
39 import java.util.TreeSet;
40
41 import org.forester.phylogeny.Phylogeny;
42 import org.forester.phylogeny.PhylogenyNode;
43 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
44 import org.forester.phylogeny.factories.PhylogenyFactory;
45 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
46 import org.forester.util.BasicDescriptiveStatistics;
47 import org.forester.util.DescriptiveStatistics;
48
49 public class genome_counts_for_once_appearing_dcs {
50
51     public static void main( final String args[] ) {
52         if ( args.length != 1 ) {
53             System.err.println();
54             System.err.println( "genome_counts_for_once_appearing_dcs: wrong number of arguments" );
55             System.err.println( "Usage: \"genome_counts_for_once_appearing_dcs <intree>" );
56             System.err.println();
57             System.exit( -1 );
58         }
59         final File infile = new File( args[ 0 ] );
60         Phylogeny phy = null;
61         try {
62             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
63             phy = factory.create( infile, org.forester.io.parsers.util.ParserUtils
64                                   .createParserDependingOnFileType( infile, true ) )[ 0 ];
65         }
66         catch ( final Exception e ) {
67             System.err.println( e + "\nCould not read " + infile + "\n" );
68             System.exit( -1 );
69         }
70         final SortedSet<String> all_dcs = getAllExternalPresentAndGainedCharacters( phy.getRoot() );
71         final SortedSet<String> appearing_once_dcs = new TreeSet<String>();
72         System.out.println( "All DCs: " + all_dcs.size() );
73         for( final String dc : all_dcs ) {
74             int reappearing_count = 0;
75             for( final PhylogenyNodeIterator it = phy.iteratorPreorder(); it.hasNext(); ) {
76                 final PhylogenyNode n = it.next();
77                 SortedSet<String> n_gained_dcs = null;
78                 if ( n.isRoot() ) {
79                     n_gained_dcs = n.getNodeData().getBinaryCharacters().getPresentCharacters();
80                 }
81                 else {
82                     n_gained_dcs = n.getNodeData().getBinaryCharacters().getGainedCharacters();
83                 }
84                 if ( n_gained_dcs.contains( dc ) ) {
85                     reappearing_count++;
86                 }
87             }
88             if ( reappearing_count < 1 ) {
89                 System.out.println( "error: " + dc );
90                 System.exit( -1 );
91             }
92             if ( reappearing_count == 1 ) {
93                 appearing_once_dcs.add( dc );
94             }
95         }
96         System.out.println( "Appearing once DCs: " + appearing_once_dcs.size() );
97         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
98         final Map<String, Set<String>> node_to_chars = new HashMap<String, Set<String>>();
99         final SortedSet<String> appearing_in_all_dcs = new TreeSet<String>();
100         for( final String appearing_once_dc : appearing_once_dcs ) {
101             int count = 0;
102             for( final PhylogenyNodeIterator ite = phy.iteratorExternalForward(); ite.hasNext(); ) {
103                 final PhylogenyNode ext_node = ite.next();
104                 if ( !node_to_chars.containsKey( ext_node.getName() ) ) {
105                     node_to_chars.put( ext_node.getName(), getAllExternalPresentAndGainedCharacters( ext_node ) );
106                 }
107                 if ( node_to_chars.get( ext_node.getName() ).contains( appearing_once_dc ) ) {
108                     count++;
109                 }
110             }
111             if ( count < 1 ) {
112                 System.out.println( "error, count is <1" );
113                 System.exit( -1 );
114             }
115             if ( count == phy.getNumberOfExternalNodes() ) {
116                 appearing_in_all_dcs.add( appearing_once_dc );
117             }
118             stats.addValue( count );
119         }
120         System.out.println();
121         System.out.println( stats.toString() );
122         System.out.println();
123         final int[] bins = BasicDescriptiveStatistics.performBinning( stats.getDataAsDoubleArray(), 1, 172, 172 );
124         for( int i = 0; i < bins.length; i++ ) {
125             System.out.println( ( i + 1 ) + "\t" + bins[ i ] );
126         }
127         System.out.println();
128         System.out.println( "appearing in all:" );
129         for( final String i : appearing_in_all_dcs ) {
130             System.out.println( i );
131         }
132         System.out.println();
133         for( final String dc : appearing_once_dcs ) {
134             System.out.println( "1\t" + dc );
135         }
136     }
137
138     private static SortedSet<String> getAllExternalPresentAndGainedCharacters( final PhylogenyNode node ) {
139         final SortedSet<String> chars = new TreeSet<String>();
140         final List<PhylogenyNode> descs = node.getAllExternalDescendants();
141         for( final PhylogenyNode desc : descs ) {
142             chars.addAll( desc.getNodeData().getBinaryCharacters().getGainedCharacters() );
143             chars.addAll( desc.getNodeData().getBinaryCharacters().getPresentCharacters() );
144         }
145         return chars;
146     }
147 }