new tool
[jalview.git] / forester / java / src / org / forester / application / subtree_feature_count.java
1
2 package org.forester.application;
3
4 import java.io.File;
5 import java.util.ArrayList;
6 import java.util.List;
7 import java.util.SortedSet;
8
9 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
10 import org.forester.phylogeny.Phylogeny;
11 import org.forester.phylogeny.PhylogenyNode;
12 import org.forester.phylogeny.data.Accession;
13 import org.forester.phylogeny.data.Sequence;
14 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
15 import org.forester.phylogeny.factories.PhylogenyFactory;
16 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
17 import org.forester.util.CommandLineArguments;
18 import org.forester.util.ForesterUtil;
19
20 public class subtree_feature_count {
21
22     final static private String DEPTH_OPTION  = "d";
23     final static private String E_MAIL        = "phylosoft@gmail.com";
24     final static private String HELP_OPTION_1 = "help";
25     final static private String HELP_OPTION_2 = "h";
26     final static private String PRG_DATE      = "131120";
27     final static private String PRG_DESC      = "";
28     final static private String PRG_NAME      = "subtree_feature_count";
29     final static private String PRG_VERSION   = "0.90";
30     final static private String WWW           = "sites.google.com/site/cmzmasek/home/software/forester";
31
32     public static void main( final String args[] ) {
33         try {
34             final CommandLineArguments cla = new CommandLineArguments( args );
35             if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( cla.getNumberOfNames() != 3 ) ) {
36                 printHelp();
37                 System.exit( 0 );
38             }
39             final List<String> allowed_options = new ArrayList<String>();
40             allowed_options.add( DEPTH_OPTION );
41             final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
42             if ( dissallowed_options.length() > 0 ) {
43                 ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
44             }
45             final double depth = cla.getOptionValueAsDouble( DEPTH_OPTION );
46             final File intree_file = cla.getFile( 0 );
47             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
48             final Phylogeny phy = factory.create( intree_file, new PhyloXmlParser() )[ 0 ];
49             execute( phy, depth );
50         }
51         catch ( final Exception e ) {
52             e.printStackTrace();
53             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
54         }
55     }
56
57     private static StringBuilder analyzeSubtree( final PhylogenyNode n, final double depth ) {
58         final PhylogenyNode node = moveUp( n, depth );
59         for( final PhylogenyNode ext : node.getAllExternalDescendants() ) {
60             ext.setIndicator( ( byte ) 1 );
61         }
62         int xray = 0;
63         int nmr = 0;
64         int model = 0;
65         boolean is_first = true;
66         PhylogenyNode first_node = null;
67         PhylogenyNode last_node = null;
68         int c = 0;
69         for( final PhylogenyNode ext : node.getAllExternalDescendants() ) {
70             if ( is_first ) {
71                 first_node = ext;
72                 is_first = false;
73             }
74             last_node = ext;
75             ++c;
76             if ( ext.getNodeData().isHasSequence() ) {
77                 final Sequence seq = ext.getNodeData().getSequence();
78                 final SortedSet<Accession> xrefs = seq.getCrossReferences();
79                 if ( !ForesterUtil.isEmpty( xrefs ) ) {
80                     for( final Accession xref : xrefs ) {
81                         if ( xref.getSource().equalsIgnoreCase( "pdb" ) ) {
82                             if ( xref.getComment().equalsIgnoreCase( "x-ray" )
83                                     || xref.getComment().equalsIgnoreCase( "xray" ) ) {
84                                 ++xray;
85                             }
86                             if ( xref.getComment().equalsIgnoreCase( "nmr" ) ) {
87                                 ++nmr;
88                             }
89                             if ( xref.getComment().equalsIgnoreCase( "model" ) ) {
90                                 ++model;
91                             }
92                         }
93                     }
94                 }
95             }
96         }
97         final StringBuilder sb = new StringBuilder();
98         sb.append( String.valueOf( c ) );
99         sb.append( "\t" );
100         sb.append( first_node.getName() );
101         sb.append( "\t" );
102         sb.append( last_node.getName() );
103         sb.append( "\t" );
104         sb.append( String.valueOf( xray ) );
105         sb.append( "\t" );
106         sb.append( String.valueOf( nmr ) );
107         sb.append( "\t" );
108         sb.append( String.valueOf( model ) );
109         return sb;
110     }
111
112     private static void execute( final Phylogeny phy, final double depth ) {
113         setAllIndicatorsToZero( phy );
114         for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
115             final PhylogenyNode n = it.next();
116             if ( n.getIndicator() != 0 ) {
117                 continue;
118             }
119             final StringBuilder s = analyzeSubtree( n, depth );
120             System.out.println( s.toString() );
121         }
122     }
123
124     private static PhylogenyNode moveUp( final PhylogenyNode node, final double depth ) {
125         PhylogenyNode n = node;
126         double current_depth = 0.0;
127         while ( current_depth < depth ) {
128             current_depth += n.getDistanceToParent();
129             if ( n.getParent() == null ) {
130                 throw new IllegalArgumentException( "Depth " + depth + " is too large" );
131             }
132             n = n.getParent();
133         }
134         return n;
135     }
136
137     private static void printHelp() {
138         ForesterUtil.printProgramInformation( PRG_NAME,
139                                               PRG_DESC,
140                                               PRG_VERSION,
141                                               PRG_DATE,
142                                               E_MAIL,
143                                               WWW,
144                                               ForesterUtil.getForesterLibraryInformation() );
145         System.out.println( "Usage:" );
146         System.out.println();
147         System.out.println( PRG_NAME + "" );
148         System.out.println();
149         System.out.println( " exmple: " );
150         System.out.println();
151         System.out
152                 .println( "dom_dup \"HUMAN~[12]-2\" groups.txt RRMa_ALL_plus_RRMa_ee3_50_hmmalign_05_40_fme_gsdi.phylo.xml" );
153         System.out.println();
154         System.out.println();
155     }
156
157     private static void setAllIndicatorsToZero( final Phylogeny phy ) {
158         for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
159             it.next().setIndicator( ( byte ) 0 );
160         }
161     }
162 }