From 739ed5a4a7e77a52006e1d840983c2afffb348fd Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Wed, 26 Dec 2012 06:09:43 +0000 Subject: [PATCH] rio - gsdir work... --- forester/java/src/org/forester/application/rio.java | 4 ++-- .../java/src/org/forester/archaeopteryx/Constants.java | 4 ++-- .../forester/archaeopteryx/webservices/WebserviceUtil.java | 2 +- .../src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java | 3 +-- .../java/src/org/forester/phylogeny/PhylogenyMethods.java | 11 ++++++++--- .../java/src/org/forester/phylogeny/data/Taxonomy.java | 12 +++--------- forester/java/src/org/forester/rio/RIO.java | 11 ++++++----- forester/java/src/org/forester/util/ForesterConstants.java | 5 ++--- 8 files changed, 25 insertions(+), 27 deletions(-) diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index 98fd473..1e8745f 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -46,8 +46,8 @@ import org.forester.util.ForesterUtil; public class rio { final static private String PRG_NAME = "rio"; - final static private String PRG_VERSION = "4.000 beta 3"; - final static private String PRG_DATE = "2012.12.19"; + final static private String PRG_VERSION = "4.000 beta 4"; + final static private String PRG_DATE = "2012.12.25"; final static private String E_MAIL = "czmasek@burnham.org"; final static private String WWW = "www.phylosoft.org/forester/"; final static private String HELP_OPTION_1 = "help"; diff --git a/forester/java/src/org/forester/archaeopteryx/Constants.java b/forester/java/src/org/forester/archaeopteryx/Constants.java index 7a300ff..5558a84 100644 --- a/forester/java/src/org/forester/archaeopteryx/Constants.java +++ b/forester/java/src/org/forester/archaeopteryx/Constants.java @@ -42,8 +42,8 @@ public final class Constants { public final static boolean __SYNTH_LF = false; // TODO remove me public final static boolean ALLOW_DDBJ_BLAST = false; public final static String PRG_NAME = "Archaeopteryx"; - final static String VERSION = "0.977"; - final static String PRG_DATE = "121210"; + final static String VERSION = "0.978"; + final static String PRG_DATE = "121225"; final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file"; final static String[] DEFAULT_FONT_CHOICES = { "Verdana", "Tahoma", "Arial", "Helvetica", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" }; diff --git a/forester/java/src/org/forester/archaeopteryx/webservices/WebserviceUtil.java b/forester/java/src/org/forester/archaeopteryx/webservices/WebserviceUtil.java index 5ba492e..86ff0d6 100644 --- a/forester/java/src/org/forester/archaeopteryx/webservices/WebserviceUtil.java +++ b/forester/java/src/org/forester/archaeopteryx/webservices/WebserviceUtil.java @@ -170,7 +170,7 @@ public final class WebserviceUtil { final PhylogenyNode n = it.next(); if ( n.isExternal() && n.getNodeData().isHasTaxonomy() ) { final String name = n.getNodeData().getTaxonomy().getScientificName(); - if ( !ForesterUtil.isEmpty( name ) && PhyloXmlUtil.TAXOMONY_CODE_PATTERN_STRICT.matcher( name ).matches() ) { + if ( !ForesterUtil.isEmpty( name ) && PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( name ).matches() ) { n.getNodeData().getTaxonomy().setScientificName( "" ); n.getNodeData().getTaxonomy().setTaxonomyCode( name ); } diff --git a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java index 0adb7fa..dfc92a3 100644 --- a/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java +++ b/forester/java/src/org/forester/io/parsers/phyloxml/PhyloXmlUtil.java @@ -38,8 +38,7 @@ public final class PhyloXmlUtil { public static final String OTHER = "other"; public static final String UNKNOWN = "unknown"; public final static Pattern SEQUENCE_SYMBOL_PATTERN = Pattern.compile( "\\S{1,20}" ); - public final static Pattern TAXOMONY_CODE_PATTERN_STRICT = ParserUtils.TAXOMONY_CODE_PATTERN_1; - public final static Pattern TAXOMONY_CODE_PATTERN_LAX = Pattern.compile( "[A-Z0-9]{3,6}" ); + public final static Pattern TAXOMONY_CODE_PATTERN = ParserUtils.TAXOMONY_CODE_PATTERN_1; public final static Pattern LIT_REF_DOI_PATTERN = Pattern .compile( "[a-zA-Z0-9_\\.]+\\S+" ); public final static Set SEQUENCE_TYPES = new HashSet(); diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index b2b57d4..f2a982d 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -408,7 +408,7 @@ public class PhylogenyMethods { final ArrayList to_delete = new ArrayList(); for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode n = iter.next(); - if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) { + if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) { to_delete.add( n ); } } @@ -1394,13 +1394,16 @@ public class PhylogenyMethods { if ( !n.getNodeData().isHasTaxonomy() ) { throw new IllegalArgumentException( "no taxonomic data in node: " + n ); } - // ref_ext_taxo.add( getSpecies( n ) ); if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() ); } if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() ); } + if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) + && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) { + ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ); + } } final ArrayList nodes_to_delete = new ArrayList(); for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) { @@ -1409,7 +1412,9 @@ public class PhylogenyMethods { nodes_to_delete.add( n ); } else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) ) - && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) { + && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) + && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n + .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) { nodes_to_delete.add( n ); } } diff --git a/forester/java/src/org/forester/phylogeny/data/Taxonomy.java b/forester/java/src/org/forester/phylogeny/data/Taxonomy.java index 4f6d146..2fe0497 100644 --- a/forester/java/src/org/forester/phylogeny/data/Taxonomy.java +++ b/forester/java/src/org/forester/phylogeny/data/Taxonomy.java @@ -329,18 +329,12 @@ public class Taxonomy implements PhylogenyData, MultipleUris, Comparable= 0 ) && ( last >= first ) && ( last < gene_trees.length ) ) { - my_gene_trees = new Phylogeny[ 1 + last - first ]; + my_gene_trees = new Phylogeny[ ( 1 + last ) - first ]; int c = 0; for( int i = first; i <= last; ++i ) { my_gene_trees[ c++ ] = gene_trees[ i ]; @@ -346,16 +346,17 @@ public final class RIO { log( "Gene trees analyzed : " + _duplications_stats.getN() ); log( "Mean number of duplications : " + df.format( _duplications_stats.arithmeticMean() ) + " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" + " (" - + df.format( 100.0 * _duplications_stats.arithmeticMean() / getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); + + df.format( ( 100.0 * _duplications_stats.arithmeticMean() ) / getIntNodesOfAnalyzedGeneTrees() ) + + "%)" ); if ( _duplications_stats.getN() > 3 ) { log( "Median number of duplications : " + df.format( _duplications_stats.median() ) - + " (" + df.format( 100.0 * _duplications_stats.median() / getIntNodesOfAnalyzedGeneTrees() ) + + " (" + df.format( ( 100.0 * _duplications_stats.median() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); } log( "Minimum duplications : " + ( int ) _duplications_stats.getMin() + " (" - + df.format( 100.0 * _duplications_stats.getMin() / getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); + + df.format( ( 100.0 * _duplications_stats.getMin() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); log( "Maximum duplications : " + ( int ) _duplications_stats.getMax() + " (" - + df.format( 100.0 * _duplications_stats.getMax() / getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); + + df.format( ( 100.0 * _duplications_stats.getMax() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" ); log( "Gene tree internal nodes : " + getIntNodesOfAnalyzedGeneTrees() ); log( "Gene tree external nodes : " + getExtNodesOfAnalyzedGeneTrees() ); } diff --git a/forester/java/src/org/forester/util/ForesterConstants.java b/forester/java/src/org/forester/util/ForesterConstants.java index f030c91..0910285 100644 --- a/forester/java/src/org/forester/util/ForesterConstants.java +++ b/forester/java/src/org/forester/util/ForesterConstants.java @@ -27,8 +27,8 @@ package org.forester.util; public final class ForesterConstants { - public final static String FORESTER_VERSION = "1.012"; - public final static String FORESTER_DATE = "121219"; + public final static String FORESTER_VERSION = "1.013"; + public final static String FORESTER_DATE = "121225"; public final static String PHYLO_XML_VERSION = "1.10"; public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org"; public final static String PHYLO_XML_XSD = "phyloxml.xsd"; @@ -38,7 +38,6 @@ public final class ForesterConstants { public final static String UTF8 = "UTF-8"; public final static String PHYLO_XML_REFERENCE = "Han MV and Zmasek CM (2009): \"phyloXML: XML for evolutionary biology and comparative genomics\", BMC Bioinformatics 10:356"; public final static boolean RELEASE = false; - public final static boolean TAXONOMY_CODE_STRICT = true; public enum PhylogeneticTreeFormats { -- 1.7.10.2