fixed issue with UTF8 encoding.
[jalview.git] / forester_applications / src / org / forester / applications / get_subtree_specific_chars.java
1
2 package org.forester.applications;
3
4 // $Id:
5 // FORESTER -- software libraries and applications
6 // for evolutionary biology research and applications.
7 //
8 // Copyright (C) 2008-2011 Christian M. Zmasek
9 // Copyright (C) 2008-2011 Burnham Institute for Medical Research
10 // All rights reserved
11 //
12 // This library is free software; you can redistribute it and/or
13 // modify it under the terms of the GNU Lesser General Public
14 // License as published by the Free Software Foundation; either
15 // version 2.1 of the License, or (at your option) any later version.
16 //
17 // This library is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 // Lesser General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License along with this library; if not, write to the Free Software
24 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 //
26 // Contact: phylosoft @ gmail . com
27 // WWW: www.phylosoft.org/forester
28 // javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar
29 // ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/get_subtree_specific_chars.java
30 // java -Xmx2048m -cp
31 // /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar
32 // org.forester.applications.get_subtree_specific_chars
33 import java.io.File;
34 import java.util.List;
35 import java.util.SortedSet;
36 import java.util.TreeSet;
37
38 import org.forester.phylogeny.Phylogeny;
39 import org.forester.phylogeny.PhylogenyNode;
40 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
41 import org.forester.phylogeny.factories.PhylogenyFactory;
42 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
43 import org.forester.util.ForesterUtil;
44
45 public class get_subtree_specific_chars {
46
47     final static boolean SIMPLE = true;
48
49     public static void main( final String args[] ) {
50         if ( args.length != 1 ) {
51             System.err.println();
52             System.err.println( "get_subtree_specific_chars: wrong number of arguments" );
53             System.err.println( "Usage: \"get_subtree_specific_chars <intree>" );
54             System.err.println();
55             System.exit( -1 );
56         }
57         final File infile = new File( args[ 0 ] );
58         Phylogeny phy = null;
59         try {
60             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
61             phy = factory.create( infile, org.forester.io.parsers.util.ParserUtils
62                                   .createParserDependingOnFileType( infile, true ) )[ 0 ];
63         }
64         catch ( final Exception e ) {
65             System.err.println( e + "\nCould not read " + infile + "\n" );
66             System.exit( -1 );
67         }
68         final SortedSet<Long> all_external_ids = getAllExternalDescendantsNodeIds( phy.getRoot() );
69         final SortedSet<String> all_chars = getAllExternalPresentAndGainedCharacters( phy.getRoot() );
70         System.out.println( "Sum of all external characters:\t" + all_chars.size() );
71         System.out.println();
72         for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
73             final PhylogenyNode node = iter.next();
74             if ( !SIMPLE && node.isExternal() ) {
75                 continue;
76             }
77             if ( !node.isRoot() ) {
78                 // System.out.println();
79                 if ( node.getNodeData().isHasTaxonomy()
80                         && !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getScientificName() ) ) {
81                     System.out.print( node.getNodeData().getTaxonomy().getScientificName() );
82                 }
83                 else {
84                     System.out.print( node.getName() );
85                 }
86                 // System.out.println( ":" );
87                 System.out.print( "\t" );
88                 final SortedSet<Long> external_ids = getAllExternalDescendantsNodeIds( node );
89                 final SortedSet<Long> not_external_ids = copy( all_external_ids );
90                 not_external_ids.removeAll( external_ids );
91                 final SortedSet<String> not_node_chars = new TreeSet<String>();
92                 for( final Long id : not_external_ids ) {
93                     not_node_chars.addAll( getAllExternalPresentAndGainedCharacters( phy.getNode( id ) ) );
94                 }
95                 final SortedSet<String> node_chars = getAllExternalPresentAndGainedCharacters( node );
96                 final SortedSet<String> unique_chars = new TreeSet<String>();
97                 for( final String node_char : node_chars ) {
98                     if ( !not_node_chars.contains( node_char ) ) {
99                         if ( SIMPLE ) {
100                             unique_chars.add( node_char );
101                         }
102                         else {
103                             boolean found = true;
104                             for( final Long external_id : external_ids ) {
105                                 if ( !phy.getNode( external_id ).getNodeData().getBinaryCharacters()
106                                         .getGainedCharacters().contains( node_char )
107                                         && !phy.getNode( external_id ).getNodeData().getBinaryCharacters()
108                                         .getPresentCharacters().contains( node_char ) ) {
109                                     found = false;
110                                     break;
111                                 }
112                             }
113                             if ( found ) {
114                                 unique_chars.add( node_char );
115                             }
116                         }
117                     }
118                 }
119                 // System.out.println( "\tSUM:\t" + unique_chars.size() );
120                 // System.out.println( unique_chars.size() );
121                 int counter = 0;
122                 System.out.print( "\t" + unique_chars.size() );
123                 for( final String unique_char : unique_chars ) {
124                     // System.out.println( "\t" + counter + ":\t" + unique_char
125                     // );
126                     // System.out.println( "\t" + counter + ":\t" + unique_char
127                     // );
128                     System.out.print( "\t" + unique_char );
129                     ++counter;
130                 }
131                 System.out.println();
132             }
133         }
134     }
135
136     private static SortedSet<Long> copy( final SortedSet<Long> set ) {
137         final SortedSet<Long> copy = new TreeSet<Long>();
138         for( final Long i : set ) {
139             copy.add( i );
140         }
141         return copy;
142     }
143
144     private static SortedSet<Long> getAllExternalDescendantsNodeIds( final PhylogenyNode node ) {
145         final SortedSet<Long> ids = new TreeSet<Long>();
146         final List<PhylogenyNode> descs = node.getAllExternalDescendants();
147         for( final PhylogenyNode desc : descs ) {
148             ids.add( desc.getId() );
149         }
150         return ids;
151     }
152
153     private static SortedSet<String> getAllExternalPresentAndGainedCharacters( final PhylogenyNode node ) {
154         final SortedSet<String> chars = new TreeSet<String>();
155         final List<PhylogenyNode> descs = node.getAllExternalDescendants();
156         for( final PhylogenyNode desc : descs ) {
157             chars.addAll( desc.getNodeData().getBinaryCharacters().getGainedCharacters() );
158             chars.addAll( desc.getNodeData().getBinaryCharacters().getPresentCharacters() );
159         }
160         return chars;
161     }
162 }