public class rio {
final static private String PRG_NAME = "rio";
- final static private String PRG_VERSION = "4.000 beta 3";
- final static private String PRG_DATE = "2012.12.19";
+ final static private String PRG_VERSION = "4.000 beta 4";
+ final static private String PRG_DATE = "2012.12.25";
final static private String E_MAIL = "czmasek@burnham.org";
final static private String WWW = "www.phylosoft.org/forester/";
final static private String HELP_OPTION_1 = "help";
public final static boolean __SYNTH_LF = false; // TODO remove me
public final static boolean ALLOW_DDBJ_BLAST = false;
public final static String PRG_NAME = "Archaeopteryx";
- final static String VERSION = "0.977";
- final static String PRG_DATE = "121210";
+ final static String VERSION = "0.978";
+ final static String PRG_DATE = "121225";
final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file";
final static String[] DEFAULT_FONT_CHOICES = { "Verdana", "Tahoma",
"Arial", "Helvetica", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" };
final PhylogenyNode n = it.next();
if ( n.isExternal() && n.getNodeData().isHasTaxonomy() ) {
final String name = n.getNodeData().getTaxonomy().getScientificName();
- if ( !ForesterUtil.isEmpty( name ) && PhyloXmlUtil.TAXOMONY_CODE_PATTERN_STRICT.matcher( name ).matches() ) {
+ if ( !ForesterUtil.isEmpty( name ) && PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( name ).matches() ) {
n.getNodeData().getTaxonomy().setScientificName( "" );
n.getNodeData().getTaxonomy().setTaxonomyCode( name );
}
public static final String OTHER = "other";
public static final String UNKNOWN = "unknown";
public final static Pattern SEQUENCE_SYMBOL_PATTERN = Pattern.compile( "\\S{1,20}" );
- public final static Pattern TAXOMONY_CODE_PATTERN_STRICT = ParserUtils.TAXOMONY_CODE_PATTERN_1;
- public final static Pattern TAXOMONY_CODE_PATTERN_LAX = Pattern.compile( "[A-Z0-9]{3,6}" );
+ public final static Pattern TAXOMONY_CODE_PATTERN = ParserUtils.TAXOMONY_CODE_PATTERN_1;
public final static Pattern LIT_REF_DOI_PATTERN = Pattern
.compile( "[a-zA-Z0-9_\\.]+\\S+" );
public final static Set<String> SEQUENCE_TYPES = new HashSet<String>();
final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
for( final PhylogenyNodeIterator iter = phy.iteratorPostorder(); iter.hasNext(); ) {
final PhylogenyNode n = iter.next();
- if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) {
+ if ( ( !n.isExternal() ) && ( n.getNumberOfDescendants() == 1 ) ) {
to_delete.add( n );
}
}
if ( !n.getNodeData().isHasTaxonomy() ) {
throw new IllegalArgumentException( "no taxonomic data in node: " + n );
}
- // ref_ext_taxo.add( getSpecies( n ) );
if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
ref_ext_taxo.add( n.getNodeData().getTaxonomy().getScientificName() );
}
if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
ref_ext_taxo.add( n.getNodeData().getTaxonomy().getTaxonomyCode() );
}
+ if ( ( n.getNodeData().getTaxonomy().getIdentifier() != null )
+ && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
+ ref_ext_taxo.add( n.getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() );
+ }
}
final ArrayList<PhylogenyNode> nodes_to_delete = new ArrayList<PhylogenyNode>();
for( final PhylogenyNodeIterator it = to_be_stripped.iteratorExternalForward(); it.hasNext(); ) {
nodes_to_delete.add( n );
}
else if ( !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getScientificName() ) )
- && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) ) {
+ && !( ref_ext_taxo.contains( n.getNodeData().getTaxonomy().getTaxonomyCode() ) )
+ && !( ( n.getNodeData().getTaxonomy().getIdentifier() != null ) && ref_ext_taxo.contains( n
+ .getNodeData().getTaxonomy().getIdentifier().getValuePlusProvider() ) ) ) {
nodes_to_delete.add( n );
}
}
}
public void setTaxonomyCode( final String taxonomy_code ) throws PhyloXmlDataFormatException {
- if ( ForesterConstants.TAXONOMY_CODE_STRICT ) {
+
if ( !ForesterUtil.isEmpty( taxonomy_code )
- && !PhyloXmlUtil.TAXOMONY_CODE_PATTERN_STRICT.matcher( taxonomy_code ).matches() ) {
+ && !PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( taxonomy_code ).matches() ) {
throw new PhyloXmlDataFormatException( "illegal taxonomy code: [" + taxonomy_code + "]" );
}
- }
- else {
- if ( !ForesterUtil.isEmpty( taxonomy_code )
- && !PhyloXmlUtil.TAXOMONY_CODE_PATTERN_LAX.matcher( taxonomy_code ).matches() ) {
- throw new PhyloXmlDataFormatException( "illegal taxonomy code: [" + taxonomy_code + "]" );
- }
- }
+
_taxonomy_code = taxonomy_code;
}
}
final Phylogeny[] my_gene_trees;
if ( ( first >= 0 ) && ( last >= first ) && ( last < gene_trees.length ) ) {
- my_gene_trees = new Phylogeny[ 1 + last - first ];
+ my_gene_trees = new Phylogeny[ ( 1 + last ) - first ];
int c = 0;
for( int i = first; i <= last; ++i ) {
my_gene_trees[ c++ ] = gene_trees[ i ];
log( "Gene trees analyzed : " + _duplications_stats.getN() );
log( "Mean number of duplications : " + df.format( _duplications_stats.arithmeticMean() )
+ " (sd: " + df.format( _duplications_stats.sampleStandardDeviation() ) + ")" + " ("
- + df.format( 100.0 * _duplications_stats.arithmeticMean() / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
+ + df.format( ( 100.0 * _duplications_stats.arithmeticMean() ) / getIntNodesOfAnalyzedGeneTrees() )
+ + "%)" );
if ( _duplications_stats.getN() > 3 ) {
log( "Median number of duplications : " + df.format( _duplications_stats.median() )
- + " (" + df.format( 100.0 * _duplications_stats.median() / getIntNodesOfAnalyzedGeneTrees() )
+ + " (" + df.format( ( 100.0 * _duplications_stats.median() ) / getIntNodesOfAnalyzedGeneTrees() )
+ "%)" );
}
log( "Minimum duplications : " + ( int ) _duplications_stats.getMin() + " ("
- + df.format( 100.0 * _duplications_stats.getMin() / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
+ + df.format( ( 100.0 * _duplications_stats.getMin() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
log( "Maximum duplications : " + ( int ) _duplications_stats.getMax() + " ("
- + df.format( 100.0 * _duplications_stats.getMax() / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
+ + df.format( ( 100.0 * _duplications_stats.getMax() ) / getIntNodesOfAnalyzedGeneTrees() ) + "%)" );
log( "Gene tree internal nodes : " + getIntNodesOfAnalyzedGeneTrees() );
log( "Gene tree external nodes : " + getExtNodesOfAnalyzedGeneTrees() );
}
public final class ForesterConstants {
- public final static String FORESTER_VERSION = "1.012";
- public final static String FORESTER_DATE = "121219";
+ public final static String FORESTER_VERSION = "1.013";
+ public final static String FORESTER_DATE = "121225";
public final static String PHYLO_XML_VERSION = "1.10";
public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org";
public final static String PHYLO_XML_XSD = "phyloxml.xsd";
public final static String UTF8 = "UTF-8";
public final static String PHYLO_XML_REFERENCE = "Han MV and Zmasek CM (2009): \"phyloXML: XML for evolutionary biology and comparative genomics\", BMC Bioinformatics 10:356";
public final static boolean RELEASE = false;
- public final static boolean TAXONOMY_CODE_STRICT = true;
public enum PhylogeneticTreeFormats {