From: cmzmasek@gmail.com Date: Wed, 20 Nov 2013 19:37:16 +0000 (+0000) Subject: new tool X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=d5f9ca857c8719f6c639861d3bd6e519459cc905;p=jalview.git new tool --- diff --git a/forester/java/src/org/forester/application/subtree_feature_count.java b/forester/java/src/org/forester/application/subtree_feature_count.java new file mode 100644 index 0000000..cde4cbb --- /dev/null +++ b/forester/java/src/org/forester/application/subtree_feature_count.java @@ -0,0 +1,162 @@ + +package org.forester.application; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.SortedSet; + +import org.forester.io.parsers.phyloxml.PhyloXmlParser; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.data.Accession; +import org.forester.phylogeny.data.Sequence; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.util.CommandLineArguments; +import org.forester.util.ForesterUtil; + +public class subtree_feature_count { + + final static private String DEPTH_OPTION = "d"; + final static private String E_MAIL = "phylosoft@gmail.com"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String PRG_DATE = "131120"; + final static private String PRG_DESC = ""; + final static private String PRG_NAME = "subtree_feature_count"; + final static private String PRG_VERSION = "0.90"; + final static private String WWW = "sites.google.com/site/cmzmasek/home/software/forester"; + + public static void main( final String args[] ) { + try { + final CommandLineArguments cla = new CommandLineArguments( args ); + if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( cla.getNumberOfNames() != 3 ) ) { + printHelp(); + System.exit( 0 ); + } + final List allowed_options = new ArrayList(); + allowed_options.add( DEPTH_OPTION ); + final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); + if ( dissallowed_options.length() > 0 ) { + ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); + } + final double depth = cla.getOptionValueAsDouble( DEPTH_OPTION ); + final File intree_file = cla.getFile( 0 ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny phy = factory.create( intree_file, new PhyloXmlParser() )[ 0 ]; + execute( phy, depth ); + } + catch ( final Exception e ) { + e.printStackTrace(); + ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + } + } + + private static StringBuilder analyzeSubtree( final PhylogenyNode n, final double depth ) { + final PhylogenyNode node = moveUp( n, depth ); + for( final PhylogenyNode ext : node.getAllExternalDescendants() ) { + ext.setIndicator( ( byte ) 1 ); + } + int xray = 0; + int nmr = 0; + int model = 0; + boolean is_first = true; + PhylogenyNode first_node = null; + PhylogenyNode last_node = null; + int c = 0; + for( final PhylogenyNode ext : node.getAllExternalDescendants() ) { + if ( is_first ) { + first_node = ext; + is_first = false; + } + last_node = ext; + ++c; + if ( ext.getNodeData().isHasSequence() ) { + final Sequence seq = ext.getNodeData().getSequence(); + final SortedSet xrefs = seq.getCrossReferences(); + if ( !ForesterUtil.isEmpty( xrefs ) ) { + for( final Accession xref : xrefs ) { + if ( xref.getSource().equalsIgnoreCase( "pdb" ) ) { + if ( xref.getComment().equalsIgnoreCase( "x-ray" ) + || xref.getComment().equalsIgnoreCase( "xray" ) ) { + ++xray; + } + if ( xref.getComment().equalsIgnoreCase( "nmr" ) ) { + ++nmr; + } + if ( xref.getComment().equalsIgnoreCase( "model" ) ) { + ++model; + } + } + } + } + } + } + final StringBuilder sb = new StringBuilder(); + sb.append( String.valueOf( c ) ); + sb.append( "\t" ); + sb.append( first_node.getName() ); + sb.append( "\t" ); + sb.append( last_node.getName() ); + sb.append( "\t" ); + sb.append( String.valueOf( xray ) ); + sb.append( "\t" ); + sb.append( String.valueOf( nmr ) ); + sb.append( "\t" ); + sb.append( String.valueOf( model ) ); + return sb; + } + + private static void execute( final Phylogeny phy, final double depth ) { + setAllIndicatorsToZero( phy ); + for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { + final PhylogenyNode n = it.next(); + if ( n.getIndicator() != 0 ) { + continue; + } + final StringBuilder s = analyzeSubtree( n, depth ); + System.out.println( s.toString() ); + } + } + + private static PhylogenyNode moveUp( final PhylogenyNode node, final double depth ) { + PhylogenyNode n = node; + double current_depth = 0.0; + while ( current_depth < depth ) { + current_depth += n.getDistanceToParent(); + if ( n.getParent() == null ) { + throw new IllegalArgumentException( "Depth " + depth + " is too large" ); + } + n = n.getParent(); + } + return n; + } + + private static void printHelp() { + ForesterUtil.printProgramInformation( PRG_NAME, + PRG_DESC, + PRG_VERSION, + PRG_DATE, + E_MAIL, + WWW, + ForesterUtil.getForesterLibraryInformation() ); + System.out.println( "Usage:" ); + System.out.println(); + System.out.println( PRG_NAME + "" ); + System.out.println(); + System.out.println( " exmple: " ); + System.out.println(); + System.out + .println( "dom_dup \"HUMAN~[12]-2\" groups.txt RRMa_ALL_plus_RRMa_ee3_50_hmmalign_05_40_fme_gsdi.phylo.xml" ); + System.out.println(); + System.out.println(); + } + + private static void setAllIndicatorsToZero( final Phylogeny phy ) { + for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) { + it.next().setIndicator( ( byte ) 0 ); + } + } +}