new tool
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 21 Nov 2013 03:09:43 +0000 (03:09 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 21 Nov 2013 03:09:43 +0000 (03:09 +0000)
forester_applications/src/org/forester/applications/map_lengths.java
forester_applications/src/org/forester/applications/simple_node_processor.java
forester_applications/src/org/forester/applications/subtree_feature_count.java [new file with mode: 0644]
forester_applications/src/org/forester/applications/tax_code_cleaner.java

index b4475df..c4f3983 100644 (file)
@@ -58,7 +58,8 @@ public class map_lengths {
         }
         try {
             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-            final PhyloXmlParser xml_parser = new PhyloXmlParser();
+            final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating();
+            ;
             final Phylogeny[] phylogenies_0 = factory.create( cla.getFile( 0 ), xml_parser );
             final Phylogeny phy = phylogenies_0[ 0 ];
             for( int i = 1; i < cla.getNumberOfNames(); i++ ) {
index c3d7d28..9373239 100644 (file)
@@ -40,7 +40,6 @@ import org.forester.phylogeny.data.Taxonomy;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
-import org.forester.surfacing.SurfacingUtil;
 import org.forester.util.CommandLineArguments;
 import org.forester.util.ForesterUtil;
 
@@ -53,14 +52,14 @@ public class simple_node_processor {
             CommandLineArguments cla = null;
             cla = new CommandLineArguments( args );
             in = cla.getFile( 0 );
-         //   in = new File( "");
+            //   in = new File( "");
             //out = cla.getFile( 1 );
             // if ( out.exists() ) {
             //      System.out.println( out + " already exists" );
             //      System.exit( -1 );
             //  }
             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-            final PhyloXmlParser xml_parser = new PhyloXmlParser();
+            final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating();
             final Phylogeny[] phylogenies_0 = factory.create( in, xml_parser );
             final Phylogeny phylogeny_0 = phylogenies_0[ 0 ];
             final PhylogenyNodeIterator it = phylogeny_0.iteratorPostorder();
@@ -122,11 +121,11 @@ public class simple_node_processor {
             if ( node.getNodeData().isHasTaxonomy() ) {
                 final Taxonomy t = node.getNodeData().getTaxonomy();
                 if ( !ForesterUtil.isEmpty( t.getTaxonomyCode() ) ) {
-                    final String c =  t.getTaxonomyCode();
+                    final String c = t.getTaxonomyCode();
                     if ( c.indexOf( "XX" ) == 3 ) {
-                         System.out.println( "FAKE_CODE_TO_ID_MAP.put( \"" + c + "\", "+  t.getIdentifier().getValue()   + ");" );
+                        System.out.println( "FAKE_CODE_TO_ID_MAP.put( \"" + c + "\", " + t.getIdentifier().getValue()
+                                + ");" );
                     }
-                    
                     //   SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( t.getTaxonomyCode(), phy );
                 }
             }
diff --git a/forester_applications/src/org/forester/applications/subtree_feature_count.java b/forester_applications/src/org/forester/applications/subtree_feature_count.java
new file mode 100644 (file)
index 0000000..299e8d8
--- /dev/null
@@ -0,0 +1,119 @@
+// javac -cp ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar
+// ~/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/org/forester/applications/subtree_feature_count.java
+// java -Xmx2048m -cp
+// /home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester_applications/src/:/home/czmasek/SOFTWARE_DEV/ECLIPSE_WORKSPACE/forester/java/forester.jar
+// org.forester.applications.subtree_feature_count
+
+package org.forester.applications;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.SortedSet;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlParser;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Accession;
+import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.util.CommandLineArguments;
+import org.forester.util.ForesterUtil;
+
+public class subtree_feature_count {
+
+    final static private String MIN_DISTANCE_TO_ROOT_OPTION = "d";
+    final static private String E_MAIL                      = "phylosoft@gmail.com";
+    final static private String HELP_OPTION_1               = "help";
+    final static private String HELP_OPTION_2               = "h";
+    final static private String PRG_DATE                    = "131120";
+    final static private String PRG_DESC                    = "";
+    final static private String PRG_NAME                    = "subtree_feature_count";
+    final static private String PRG_VERSION                 = "0.90";
+    final static private String WWW                         = "sites.google.com/site/cmzmasek/home/software/forester";
+
+    public static void main( final String args[] ) {
+        try {
+            final CommandLineArguments cla = new CommandLineArguments( args );
+            if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( args.length < 2 ) ) {
+                printHelp();
+                System.exit( 0 );
+            }
+            final List<String> allowed_options = new ArrayList<String>();
+            allowed_options.add( MIN_DISTANCE_TO_ROOT_OPTION );
+            final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
+            if ( dissallowed_options.length() > 0 ) {
+                ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
+            }
+            final double min_distance_to_root = cla.getOptionValueAsDouble( MIN_DISTANCE_TO_ROOT_OPTION );
+            if ( min_distance_to_root <= 0 ) {
+                ForesterUtil.fatalError( PRG_NAME, "attempt to use min distance to root of: " + min_distance_to_root );
+            }
+            final File intree_file = cla.getFile( 0 );
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny phy = factory.create( intree_file, PhyloXmlParser.createPhyloXmlParserXsdValidating() )[ 0 ];
+            execute( phy, min_distance_to_root );
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+        }
+    }
+
+    private final static void execute( final Phylogeny phy, final double min_distance_to_root ) {
+        final List<List<PhylogenyNode>> ll = PhylogenyMethods.divideIntoSubTrees( phy, min_distance_to_root );
+        for( final List<PhylogenyNode> l : ll ) {
+            int xray = 0;
+            int nmr = 0;
+            int model = 0;
+            for( final PhylogenyNode node : l ) {
+                if ( node.getNodeData().isHasSequence() ) {
+                    final Sequence seq = node.getNodeData().getSequence();
+                    final SortedSet<Accession> xrefs = seq.getCrossReferences();
+                    if ( !ForesterUtil.isEmpty( xrefs ) ) {
+                        for( final Accession xref : xrefs ) {
+                            if ( xref.getSource().equalsIgnoreCase( "pdb" ) ) {
+                                if ( xref.getComment().equalsIgnoreCase( "x-ray" )
+                                        || xref.getComment().equalsIgnoreCase( "xray" ) ) {
+                                    ++xray;
+                                }
+                                if ( xref.getComment().equalsIgnoreCase( "nmr" ) ) {
+                                    ++nmr;
+                                }
+                                if ( xref.getComment().equalsIgnoreCase( "model" ) ) {
+                                    ++model;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            final StringBuilder sb = new StringBuilder();
+            sb.append( String.valueOf( l.size() ) );
+            sb.append( "\t" );
+            sb.append( String.valueOf( xray ) );
+            sb.append( "\t" );
+            sb.append( String.valueOf( nmr ) );
+            sb.append( "\t" );
+            sb.append( String.valueOf( model ) );
+            System.out.println( sb );
+        }
+    }
+
+    private static void printHelp() {
+        ForesterUtil.printProgramInformation( PRG_NAME,
+                                              PRG_DESC,
+                                              PRG_VERSION,
+                                              PRG_DATE,
+                                              E_MAIL,
+                                              WWW,
+                                              ForesterUtil.getForesterLibraryInformation() );
+        System.out.println( "Usage:" );
+        System.out.println();
+        System.out.println( PRG_NAME + "-d=<min distance to root> <intree>" );
+        System.out.println();
+        System.out.println();
+    }
+}
index 0bc4616..cf46fe3 100644 (file)
@@ -38,7 +38,7 @@ public class tax_code_cleaner {
             //      System.exit( -1 );
             //  }
             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-            final PhyloXmlParser xml_parser = new PhyloXmlParser();
+            final PhyloXmlParser xml_parser = PhyloXmlParser.createPhyloXmlParserXsdValidating();
             final Phylogeny[] phylogenies_0 = factory.create( in, xml_parser );
             final Phylogeny phylogeny_0 = phylogenies_0[ 0 ];
             final PhylogenyNodeIterator it = phylogeny_0.iteratorPostorder();