From 00572c6ebfb23581c439e7d9e70337bf877d2840 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Thu, 21 Nov 2013 03:09:43 +0000 Subject: [PATCH] new tool --- .../src/org/forester/applications/map_lengths.java | 3 +- .../applications/simple_node_processor.java | 11 +- .../applications/subtree_feature_count.java | 119 ++++++++++++++++++++ .../forester/applications/tax_code_cleaner.java | 2 +- 4 files changed, 127 insertions(+), 8 deletions(-) create mode 100644 forester_applications/src/org/forester/applications/subtree_feature_count.java diff --git a/forester_applications/src/org/forester/applications/map_lengths.java b/forester_applications/src/org/forester/applications/map_lengths.java index b4475df..c4f3983 100644 --- a/forester_applications/src/org/forester/applications/map_lengths.java +++ b/forester_applications/src/org/forester/applications/map_lengths.java @@ -58,7 +58,8 @@ public class map_lengths { } try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhyloXmlParser xml_parser = new PhyloXmlParser(); + final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); + ; final Phylogeny[] phylogenies_0 = factory.create( cla.getFile( 0 ), xml_parser ); final Phylogeny phy = phylogenies_0[ 0 ]; for( int i = 1; i < cla.getNumberOfNames(); i++ ) { diff --git a/forester_applications/src/org/forester/applications/simple_node_processor.java b/forester_applications/src/org/forester/applications/simple_node_processor.java index c3d7d28..9373239 100644 --- a/forester_applications/src/org/forester/applications/simple_node_processor.java +++ b/forester_applications/src/org/forester/applications/simple_node_processor.java @@ -40,7 +40,6 @@ import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; -import org.forester.surfacing.SurfacingUtil; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; @@ -53,14 +52,14 @@ public class simple_node_processor { CommandLineArguments cla = null; cla = new CommandLineArguments( args ); in = cla.getFile( 0 ); - // in = new File( ""); + // in = new File( ""); //out = cla.getFile( 1 ); // if ( out.exists() ) { // System.out.println( out + " already exists" ); // System.exit( -1 ); // } final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhyloXmlParser xml_parser = new PhyloXmlParser(); + final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); final Phylogeny[] phylogenies_0 = factory.create( in, xml_parser ); final Phylogeny phylogeny_0 = phylogenies_0[ 0 ]; final PhylogenyNodeIterator it = phylogeny_0.iteratorPostorder(); @@ -122,11 +121,11 @@ public class simple_node_processor { if ( node.getNodeData().isHasTaxonomy() ) { final Taxonomy t = node.getNodeData().getTaxonomy(); if ( !ForesterUtil.isEmpty( t.getTaxonomyCode() ) ) { - final String c = t.getTaxonomyCode(); + final String c = t.getTaxonomyCode(); if ( c.indexOf( "XX" ) == 3 ) { - System.out.println( "FAKE_CODE_TO_ID_MAP.put( \"" + c + "\", "+ t.getIdentifier().getValue() + ");" ); + System.out.println( "FAKE_CODE_TO_ID_MAP.put( \"" + c + "\", " + t.getIdentifier().getValue() + + ");" ); } - // SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( t.getTaxonomyCode(), phy ); } } diff --git a/forester_applications/src/org/forester/applications/subtree_feature_count.java b/forester_applications/src/org/forester/applications/subtree_feature_count.java new file mode 100644 index 0000000..299e8d8 --- /dev/null +++ b/forester_applications/src/org/forester/applications/subtree_feature_count.java @@ -0,0 +1,119 @@ +// javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar +// ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/subtree_feature_count.java +// java -Xmx2048m -cp +// /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar +// org.forester.applications.subtree_feature_count + +package org.forester.applications; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; +import java.util.SortedSet; + +import org.forester.io.parsers.phyloxml.PhyloXmlParser; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.data.Accession; +import org.forester.phylogeny.data.Sequence; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.util.CommandLineArguments; +import org.forester.util.ForesterUtil; + +public class subtree_feature_count { + + final static private String MIN_DISTANCE_TO_ROOT_OPTION = "d"; + final static private String E_MAIL = "phylosoft@gmail.com"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String PRG_DATE = "131120"; + final static private String PRG_DESC = ""; + final static private String PRG_NAME = "subtree_feature_count"; + final static private String PRG_VERSION = "0.90"; + final static private String WWW = "sites.google.com/site/cmzmasek/home/software/forester"; + + public static void main( final String args[] ) { + try { + final CommandLineArguments cla = new CommandLineArguments( args ); + if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length < 2 ) ) { + printHelp(); + System.exit( 0 ); + } + final List allowed_options = new ArrayList(); + allowed_options.add( MIN_DISTANCE_TO_ROOT_OPTION ); + final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); + if ( dissallowed_options.length() > 0 ) { + ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); + } + final double min_distance_to_root = cla.getOptionValueAsDouble( MIN_DISTANCE_TO_ROOT_OPTION ); + if ( min_distance_to_root <= 0 ) { + ForesterUtil.fatalError( PRG_NAME, "attempt to use min distance to root of: " + min_distance_to_root ); + } + final File intree_file = cla.getFile( 0 ); + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny phy = factory.create( intree_file, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ]; + execute( phy, min_distance_to_root ); + } + catch ( final Exception e ) { + e.printStackTrace(); + ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + } + } + + private final static void execute( final Phylogeny phy, final double min_distance_to_root ) { + final List> ll = PhylogenyMethods.divideIntoSubTrees( phy, min_distance_to_root ); + for( final List l : ll ) { + int xray = 0; + int nmr = 0; + int model = 0; + for( final PhylogenyNode node : l ) { + if ( node.getNodeData().isHasSequence() ) { + final Sequence seq = node.getNodeData().getSequence(); + final SortedSet xrefs = seq.getCrossReferences(); + if ( !ForesterUtil.isEmpty( xrefs ) ) { + for( final Accession xref : xrefs ) { + if ( xref.getSource().equalsIgnoreCase( "pdb" ) ) { + if ( xref.getComment().equalsIgnoreCase( "x-ray" ) + || xref.getComment().equalsIgnoreCase( "xray" ) ) { + ++xray; + } + if ( xref.getComment().equalsIgnoreCase( "nmr" ) ) { + ++nmr; + } + if ( xref.getComment().equalsIgnoreCase( "model" ) ) { + ++model; + } + } + } + } + } + } + final StringBuilder sb = new StringBuilder(); + sb.append( String.valueOf( l.size() ) ); + sb.append( "\t" ); + sb.append( String.valueOf( xray ) ); + sb.append( "\t" ); + sb.append( String.valueOf( nmr ) ); + sb.append( "\t" ); + sb.append( String.valueOf( model ) ); + System.out.println( sb ); + } + } + + private static void printHelp() { + ForesterUtil.printProgramInformation( PRG_NAME, + PRG_DESC, + PRG_VERSION, + PRG_DATE, + E_MAIL, + WWW, + ForesterUtil.getForesterLibraryInformation() ); + System.out.println( "Usage:" ); + System.out.println(); + System.out.println( PRG_NAME + "-d= " ); + System.out.println(); + System.out.println(); + } +} diff --git a/forester_applications/src/org/forester/applications/tax_code_cleaner.java b/forester_applications/src/org/forester/applications/tax_code_cleaner.java index 0bc4616..cf46fe3 100644 --- a/forester_applications/src/org/forester/applications/tax_code_cleaner.java +++ b/forester_applications/src/org/forester/applications/tax_code_cleaner.java @@ -38,7 +38,7 @@ public class tax_code_cleaner { // System.exit( -1 ); // } final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhyloXmlParser xml_parser = new PhyloXmlParser(); + final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating(); final Phylogeny[] phylogenies_0 = factory.create( in, xml_parser ); final Phylogeny phylogeny_0 = phylogenies_0[ 0 ]; final PhylogenyNodeIterator it = phylogeny_0.iteratorPostorder(); -- 1.7.10.2