rio - gsdir work...
authorcmzmasek <cmzmasek@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 26 Dec 2012 06:09:43 +0000 (06:09 +0000)
committercmzmasek <cmzmasek@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 26 Dec 2012 06:09:43 +0000 (06:09 +0000)
forester/java/src/org/forester/application/rio.java
forester/java/src/org/forester/archaeopteryx/Constants.java
forester/java/src/org/forester/archaeopteryx/webservices/WebserviceUtil.java
forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java
forester/java/src/org/forester/phylogeny/PhylogenyMethods.java
forester/java/src/org/forester/phylogeny/data/Taxonomy.java
forester/java/src/org/forester/rio/RIO.java
forester/java/src/org/forester/util/ForesterConstants.java

index 98fd473..1e8745f 100644 (file)
@@ -46,8 +46,8 @@ import org.forester.util.ForesterUtil;
 public class rio {
 
     final static private String PRG_NAME      = "rio";
-    final static private String PRG_VERSION   = "4.000 beta 3";
-    final static private String PRG_DATE      = "2012.12.19";
+    final static private String PRG_VERSION   = "4.000 beta 4";
+    final static private String PRG_DATE      = "2012.12.25";
     final static private String E_MAIL        = "czmasek@burnham.org";
     final static private String WWW           = "www.phylosoft.org/forester/";
     final static private String HELP_OPTION_1 = "help";
index 7a300ff..5558a84 100644 (file)
@@ -42,8 +42,8 @@ public final class Constants {
     public final static boolean __SYNTH_LF                                                    = false;                                                    // TODO remove me
     public final static boolean ALLOW_DDBJ_BLAST                                              = false;
     public final static String  PRG_NAME                                                      = "Archaeopteryx";
-    final static String         VERSION                                                       = "0.977";
-    final static String         PRG_DATE                                                      = "121210";
+    final static String         VERSION                                                       = "0.978";
+    final static String         PRG_DATE                                                      = "121225";
     final static String         DEFAULT_CONFIGURATION_FILE_NAME                               = "_aptx_configuration_file";
     final static String[]       DEFAULT_FONT_CHOICES                                          = { "Verdana", "Tahoma",
             "Arial", "Helvetica", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" };
index 5ba492e..86ff0d6 100644 (file)
@@ -170,7 +170,7 @@ public final class WebserviceUtil {
             final PhylogenyNode n = it.next();
             if ( n.isExternal() && n.getNodeData().isHasTaxonomy() ) {
                 final String name = n.getNodeData().getTaxonomy().getScientificName();
-                if ( !ForesterUtil.isEmpty( name ) && PhyloXmlUtil.TAXOMONY_CODE_PATTERN_STRICT.matcher( name ).matches() ) {
+                if ( !ForesterUtil.isEmpty( name ) && PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( name ).matches() ) {
                     n.getNodeData().getTaxonomy().setScientificName( "" );
                     n.getNodeData().getTaxonomy().setTaxonomyCode( name );
                 }
index 0adb7fa..dfc92a3 100644 (file)
@@ -38,8 +38,7 @@ public final class PhyloXmlUtil {
     public static final String       OTHER                                      = "other";
     public static final String       UNKNOWN                                    = "unknown";
     public final static Pattern      SEQUENCE_SYMBOL_PATTERN                    = Pattern.compile( "\\S{1,20}" );
-    public final static Pattern      TAXOMONY_CODE_PATTERN_STRICT               = ParserUtils.TAXOMONY_CODE_PATTERN_1;
-    public final static Pattern      TAXOMONY_CODE_PATTERN_LAX                  = Pattern.compile( "[A-Z0-9]{3,6}" );
+    public final static Pattern      TAXOMONY_CODE_PATTERN               = ParserUtils.TAXOMONY_CODE_PATTERN_1;
     public final static Pattern      LIT_REF_DOI_PATTERN                        = Pattern
                                                                                         .compile( "[a-zA-Z0-9_\\.]+\\S+" );
     public final static Set<String>  SEQUENCE_TYPES                             = new HashSet<String>();
index b2b57d4..f2a982d 100644 (file)
@@ -408,7 +408,7 @@ public class PhylogenyMethods {
         final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
         for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
             final PhylogenyNode n = iter.next();
-            if ( ( !n.isExternal() )  && ( n.getNumberOfDescendants() == 1 ) ) {
+            if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) {
                 to_delete.add( n );
             }
         }
@@ -1394,13 +1394,16 @@ public class PhylogenyMethods {
             if ( !n.getNodeData().isHasTaxonomy() ) {
                 throw new IllegalArgumentException( "no taxonomic data in node: " + n );
             }
-            //  ref_ext_taxo.add( getSpecies( n ) );
             if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
                 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() );
             }
             if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
                 ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() );
             }
+            if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null )
+                    && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
+                ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() );
+            }
         }
         final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
         for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
@@ -1409,7 +1412,9 @@ public class PhylogenyMethods {
                 nodes_to_delete.add( n );
             }
             else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
-                    && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
+                    && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
+                    && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n
+                            .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
                 nodes_to_delete.add( n );
             }
         }
index 4f6d146..2fe0497 100644 (file)
@@ -329,18 +329,12 @@ public class Taxonomy implements PhylogenyData, MultipleUris, Comparable<Taxonom
     }
 
     public void setTaxonomyCode( final String taxonomy_code ) throws PhyloXmlDataFormatException {
-        if ( ForesterConstants.TAXONOMY_CODE_STRICT ) {
+      
             if ( !ForesterUtil.isEmpty( taxonomy_code )
-                    && !PhyloXmlUtil.TAXOMONY_CODE_PATTERN_STRICT.matcher( taxonomy_code ).matches() ) {
+                    && !PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( taxonomy_code ).matches() ) {
                 throw new PhyloXmlDataFormatException( "illegal taxonomy code: [" + taxonomy_code + "]" );
             }
-        }
-        else {
-            if ( !ForesterUtil.isEmpty( taxonomy_code )
-                    && !PhyloXmlUtil.TAXOMONY_CODE_PATTERN_LAX.matcher( taxonomy_code ).matches() ) {
-                throw new PhyloXmlDataFormatException( "illegal taxonomy code: [" + taxonomy_code + "]" );
-            }
-        }
+       
         _taxonomy_code = taxonomy_code;
     }
 
index 24b4c2a..d844d2b 100644 (file)
@@ -161,7 +161,7 @@ public final class RIO {
         }
         final Phylogeny[] my_gene_trees;
         if ( ( first >= 0 ) && ( last >= first ) && ( last < gene_trees.length ) ) {
-            my_gene_trees = new Phylogeny[ 1 + last - first ];
+            my_gene_trees = new Phylogeny[ ( 1 + last ) - first ];
             int c = 0;
             for( int i = first; i <= last; ++i ) {
                 my_gene_trees[ c++ ] = gene_trees[ i ];
@@ -346,16 +346,17 @@ public final class RIO {
         log( "Gene trees analyzed                             : " + _duplications_stats.getN() );
         log( "Mean number of duplications                     : " + df.format( _duplications_stats.arithmeticMean() )
                 + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" + " ("
-                + df.format( 100.0 * _duplications_stats.arithmeticMean() / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
+                + df.format( ( 100.0 * _duplications_stats.arithmeticMean() ) / getIntNodesOfAnalyzedGeneTrees() )
+                + "%)" );
         if ( _duplications_stats.getN() > 3 ) {
             log( "Median number of duplications                   : " + df.format( _duplications_stats.median() )
-                    + " (" + df.format( 100.0 * _duplications_stats.median() / getIntNodesOfAnalyzedGeneTrees() )
+                    + " (" + df.format( ( 100.0 * _duplications_stats.median() ) / getIntNodesOfAnalyzedGeneTrees() )
                     + "%)" );
         }
         log( "Minimum duplications                            : " + ( int ) _duplications_stats.getMin() + " ("
-                + df.format( 100.0 * _duplications_stats.getMin() / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
+                + df.format( ( 100.0 * _duplications_stats.getMin() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
         log( "Maximum duplications                            : " + ( int ) _duplications_stats.getMax() + " ("
-                + df.format( 100.0 * _duplications_stats.getMax() / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
+                + df.format( ( 100.0 * _duplications_stats.getMax() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
         log( "Gene tree internal nodes                        : " + getIntNodesOfAnalyzedGeneTrees() );
         log( "Gene tree external nodes                        : " + getExtNodesOfAnalyzedGeneTrees() );
     }
index f030c91..0910285 100644 (file)
@@ -27,8 +27,8 @@ package org.forester.util;
 
 public final class ForesterConstants {
 
-    public final static String  FORESTER_VERSION            = "1.012";
-    public final static String  FORESTER_DATE               = "121219";
+    public final static String  FORESTER_VERSION            = "1.013";
+    public final static String  FORESTER_DATE               = "121225";
     public final static String  PHYLO_XML_VERSION           = "1.10";
     public final static String  PHYLO_XML_LOCATION          = "http://www.phyloxml.org";
     public final static String  PHYLO_XML_XSD               = "phyloxml.xsd";
@@ -38,7 +38,6 @@ public final class ForesterConstants {
     public final static String  UTF8                        = "UTF-8";
     public final static String  PHYLO_XML_REFERENCE         = "Han MV and Zmasek CM (2009): \"phyloXML: XML for evolutionary biology and comparative genomics\", BMC Bioinformatics 10:356";
     public final static boolean RELEASE                     = false;
-    public final static boolean TAXONOMY_CODE_STRICT        = true;
     
     
     public enum PhylogeneticTreeFormats {