From: cmzmasek@gmail.com Date: Tue, 18 Oct 2011 22:03:47 +0000 (+0000) Subject: big refactoring (moving of methods) X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=038c34792757a86f24296de5683e722fab3f9307;p=jalview.git big refactoring (moving of methods) --- diff --git a/forester/java/src/org/forester/application/confadd.java b/forester/java/src/org/forester/application/confadd.java index dbc4d50..0a24dbf 100644 --- a/forester/java/src/org/forester/application/confadd.java +++ b/forester/java/src/org/forester/application/confadd.java @@ -33,6 +33,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; @@ -145,7 +146,7 @@ public class confadd { Phylogeny[] evaluators = null; final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); try { - targets = factory.create( target_file, ForesterUtil.createParserDependingOnFileType( target_file, true ) ); + targets = factory.create( target_file, ParserUtils.createParserDependingOnFileType( target_file, true ) ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, @@ -170,7 +171,7 @@ public class confadd { } try { evaluators = factory.create( evaluators_file, - ForesterUtil.createParserDependingOnFileType( evaluators_file, true ) ); + ParserUtils.createParserDependingOnFileType( evaluators_file, true ) ); } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, "failed to read evaluator topologies from [" + evaluators_file + "]: " diff --git a/forester/java/src/org/forester/application/count_support.java b/forester/java/src/org/forester/application/count_support.java index e094f69..3df54ba 100644 --- a/forester/java/src/org/forester/application/count_support.java +++ b/forester/java/src/org/forester/application/count_support.java @@ -32,6 +32,7 @@ import java.util.Arrays; import java.util.List; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; @@ -104,7 +105,7 @@ public class count_support { Phylogeny[] ev = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( phylogeny_infile, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogeny_infile, true ); p = factory.create( phylogeny_infile, pp )[ 0 ]; } catch ( final Exception e ) { @@ -113,7 +114,7 @@ public class count_support { } try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( evaluators_infile, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( evaluators_infile, true ); ev = factory.create( evaluators_infile, pp ); } catch ( final Exception e ) { diff --git a/forester/java/src/org/forester/application/decorator.java b/forester/java/src/org/forester/application/decorator.java index cec430b..2b711a0 100644 --- a/forester/java/src/org/forester/application/decorator.java +++ b/forester/java/src/org/forester/application/decorator.java @@ -32,6 +32,7 @@ import java.util.List; import java.util.Map; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.data.Identifier; @@ -291,7 +292,7 @@ public final class decorator { Phylogeny[] phylogenies = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( phylogenies_infile, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infile, true ); phylogenies = factory.create( phylogenies_infile, pp ); } catch ( final Exception e ) { diff --git a/forester/java/src/org/forester/application/nhx_too.java b/forester/java/src/org/forester/application/nhx_too.java index 32acbc8..1d212f0 100644 --- a/forester/java/src/org/forester/application/nhx_too.java +++ b/forester/java/src/org/forester/application/nhx_too.java @@ -31,8 +31,10 @@ import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.util.CommandLineArguments; @@ -79,7 +81,7 @@ public class nhx_too { Phylogeny p = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( phylogeny_infile, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogeny_infile, true ); p = factory.create( phylogeny_infile, pp )[ 0 ]; } catch ( final Exception e ) { @@ -88,7 +90,7 @@ public class nhx_too { } if ( int_node_name_is_support ) { try { - ForesterUtil.transferInternalNodeNamesToConfidence( p ); + PhylogenyMethods.transferInternalNodeNamesToConfidence( p ); } catch ( final Exception e ) { ForesterUtil.unexpectedFatalError( nhx_too.PRG_NAME, diff --git a/forester/java/src/org/forester/application/pccx.java b/forester/java/src/org/forester/application/pccx.java index 0cb6577..e52c116 100644 --- a/forester/java/src/org/forester/application/pccx.java +++ b/forester/java/src/org/forester/application/pccx.java @@ -33,6 +33,7 @@ import java.util.Arrays; import java.util.List; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.pccx.BasicExternalNodeBasedCoverageExtender; import org.forester.pccx.Coverage; @@ -144,7 +145,7 @@ public class pccx { Phylogeny[] phylogenies = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( phylogenies_infile, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infile, true ); phylogenies = factory.create( phylogenies_infile, pp ); } catch ( final IOException e ) { diff --git a/forester/java/src/org/forester/application/phyloxml_converter.java b/forester/java/src/org/forester/application/phyloxml_converter.java index d2c0a5c..afa7dd7 100644 --- a/forester/java/src/org/forester/application/phyloxml_converter.java +++ b/forester/java/src/org/forester/application/phyloxml_converter.java @@ -34,6 +34,7 @@ import java.util.List; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; @@ -43,7 +44,6 @@ import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; -import org.forester.util.ForesterUtil.PhylogenyNodeField; public class phyloxml_converter { @@ -125,30 +125,30 @@ public class phyloxml_converter { System.exit( -1 ); } final String field_option_value = cla.getOptionValue( FIELD_OPTION ); - PhylogenyNodeField field = null; + PhylogenyMethods.PhylogenyNodeField field = null; if ( field_option_value.equals( FIELD_CLADE_NAME ) ) { - field = PhylogenyNodeField.CLADE_NAME; + field = PhylogenyMethods.PhylogenyNodeField.CLADE_NAME; } else if ( field_option_value.equals( FIELD_TAXONOMY_CODE ) ) { - field = PhylogenyNodeField.TAXONOMY_CODE; + field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE; } else if ( field_option_value.equals( FIELD_TAXONOMY_SCI_NAME ) ) { - field = PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME; + field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME; } else if ( field_option_value.equals( FIELD_TAXONOMY_COMM_NAME ) ) { - field = PhylogenyNodeField.TAXONOMY_COMMON_NAME; + field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_COMMON_NAME; } else if ( field_option_value.equals( FIELD_SEQUENCE_GENE_NAME ) ) { - field = PhylogenyNodeField.SEQUENCE_NAME; + field = PhylogenyMethods.PhylogenyNodeField.SEQUENCE_NAME; } else if ( field_option_value.equals( FIELD_SEQUENCE_SYMBOL ) ) { - field = PhylogenyNodeField.SEQUENCE_SYMBOL; + field = PhylogenyMethods.PhylogenyNodeField.SEQUENCE_SYMBOL; } else if ( field_option_value.equals( FIELD_UNIPROT_TAXONOMY_ID_SPLIT_1 ) ) { - field = PhylogenyNodeField.TAXONOMY_ID_UNIPROT_1; + field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID_UNIPROT_1; } else if ( field_option_value.equals( FIELD_UNIPROT_TAXONOMY_ID_SPLIT_2 ) ) { - field = PhylogenyNodeField.TAXONOMY_ID_UNIPROT_2; + field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID_UNIPROT_2; } else if ( field_option_value.equals( FIELD_DUMMY ) ) { } @@ -195,23 +195,23 @@ public class phyloxml_converter { Phylogeny[] phys = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser parser = ForesterUtil.createParserDependingOnFileType( infile, true ); + final PhylogenyParser parser = ParserUtils.createParserDependingOnFileType( infile, true ); if ( parser instanceof NHXParser ) { - if ( ( field != PhylogenyNodeField.TAXONOMY_CODE ) - && ( field != PhylogenyNodeField.TAXONOMY_COMMON_NAME ) - && ( field != PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME ) ) { + if ( ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE ) + && ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_COMMON_NAME ) + && ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME ) ) { if ( extr_taxonomy_pf_only ) { ( ( NHXParser ) parser ) - .setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); replace_underscores = false; } else if ( extr_taxonomy ) { - ( ( NHXParser ) parser ).setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.YES ); + ( ( NHXParser ) parser ).setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.YES ); replace_underscores = false; } } else { - ( ( NHXParser ) parser ).setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.NO ); + ( ( NHXParser ) parser ).setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.NO ); } ( ( NHXParser ) parser ).setReplaceUnderscores( replace_underscores ); ( ( NHXParser ) parser ).setIgnoreQuotes( false ); @@ -232,12 +232,12 @@ public class phyloxml_converter { } if ( int_values_are_boots ) { for( final Phylogeny phy : phys ) { - ForesterUtil.transferInternalNamesToBootstrapSupport( phy ); + PhylogenyMethods.transferInternalNamesToBootstrapSupport( phy ); } } if ( field != null ) { for( final Phylogeny phy : phys ) { - ForesterUtil.transferNodeNameToField( phy, field ); + PhylogenyMethods.transferNodeNameToField( phy, field ); } } if ( midpoint_reroot ) { diff --git a/forester/java/src/org/forester/application/printAllSpecies.java b/forester/java/src/org/forester/application/printAllSpecies.java index 3d31929..df7ae17 100644 --- a/forester/java/src/org/forester/application/printAllSpecies.java +++ b/forester/java/src/org/forester/application/printAllSpecies.java @@ -30,12 +30,12 @@ import java.io.FileWriter; import java.io.PrintWriter; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; -import org.forester.util.ForesterUtil; public class printAllSpecies { @@ -53,7 +53,7 @@ public class printAllSpecies { outfile = new File( args[ 1 ] ); try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( infile, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( infile, true ); tree = factory.create( infile, pp )[ 0 ]; } catch ( final Exception e ) { diff --git a/forester/java/src/org/forester/application/printSameOrder.java b/forester/java/src/org/forester/application/printSameOrder.java index b45d132..9004046 100644 --- a/forester/java/src/org/forester/application/printSameOrder.java +++ b/forester/java/src/org/forester/application/printSameOrder.java @@ -30,11 +30,11 @@ import java.io.FileWriter; import java.io.PrintWriter; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; -import org.forester.util.ForesterUtil; public class printSameOrder { @@ -52,7 +52,7 @@ public class printSameOrder { outfile = new File( args[ 1 ] ); try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( infile, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( infile, true ); tree = factory.create( infile, pp )[ 0 ]; } catch ( final Exception e ) { diff --git a/forester/java/src/org/forester/application/strip.java b/forester/java/src/org/forester/application/strip.java index 7af9ff8..4d1c2ad 100644 --- a/forester/java/src/org/forester/application/strip.java +++ b/forester/java/src/org/forester/application/strip.java @@ -29,12 +29,12 @@ import java.io.File; import java.io.IOException; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; -import org.forester.util.ForesterUtil; public class strip { @@ -55,7 +55,7 @@ public class strip { Phylogeny p = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( infile, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( infile, true ); p = factory.create( infile, pp )[ 0 ]; } catch ( final Exception e ) { @@ -109,7 +109,7 @@ public class strip { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final File f = new File( file ); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( f, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( f, true ); p0 = factory.create( f, pp )[ 0 ]; } catch ( final Exception e ) { diff --git a/forester/java/src/org/forester/application/support_statistics.java b/forester/java/src/org/forester/application/support_statistics.java index 4235048..a6b6c03 100644 --- a/forester/java/src/org/forester/application/support_statistics.java +++ b/forester/java/src/org/forester/application/support_statistics.java @@ -32,6 +32,7 @@ import java.util.ArrayList; import java.util.List; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; @@ -176,8 +177,7 @@ public final class support_statistics { for( int i = 0; i < phylogenies_infiles.length; i++ ) { try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil - .createParserDependingOnFileType( phylogenies_infiles[ i ], true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( phylogenies_infiles[ i ], true ); phylogenies[ i ] = factory.create( phylogenies_infiles[ i ], pp )[ 0 ]; } catch ( final IOException e ) { diff --git a/forester/java/src/org/forester/application/support_transfer.java b/forester/java/src/org/forester/application/support_transfer.java index f6e2aed..82817e9 100644 --- a/forester/java/src/org/forester/application/support_transfer.java +++ b/forester/java/src/org/forester/application/support_transfer.java @@ -31,6 +31,7 @@ import java.util.List; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nhx.NHXParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; @@ -39,7 +40,6 @@ import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; -import org.forester.util.ForesterUtil; public final class support_transfer { @@ -91,10 +91,10 @@ public final class support_transfer { System.exit( -1 ); } final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp_bl = ForesterUtil.createParserDependingOnFileType( infile_bl, true ); - final PhylogenyParser pp_s = ForesterUtil.createParserDependingOnFileType( infile_support_vals, true ); + final PhylogenyParser pp_bl = ParserUtils.createParserDependingOnFileType( infile_bl, true ); + final PhylogenyParser pp_s = ParserUtils.createParserDependingOnFileType( infile_support_vals, true ); if ( pp_bl instanceof NHXParser ) { - ( ( NHXParser ) pp_bl ).setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.YES ); + ( ( NHXParser ) pp_bl ).setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.YES ); } phylogeny_w_bl = factory.create( infile_bl, pp_bl )[ index_of_tree_w_bl ]; phylogeny_w_support_vals = factory.create( infile_support_vals, pp_s )[ 0 ]; diff --git a/forester/java/src/org/forester/application/surf_paup.java b/forester/java/src/org/forester/application/surf_paup.java index a60d22f..eff3425 100644 --- a/forester/java/src/org/forester/application/surf_paup.java +++ b/forester/java/src/org/forester/application/surf_paup.java @@ -38,8 +38,10 @@ import org.forester.io.parsers.nexus.NexusCharactersParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nexus.PaupLogParser; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.surfacing.DomainParsimonyCalculator; import org.forester.surfacing.SurfacingUtil; import org.forester.util.CommandLineArguments; @@ -114,7 +116,7 @@ public class surf_paup { if ( !phylogeny.isRooted() ) { ForesterUtil.fatalError( PRG_NAME, "phylogeny from [" + surfacing_nexus_outfile + "] is not rooted" ); } - ForesterUtil.postOrderRelabelInternalNodes( phylogeny, phylogeny.getNumberOfExternalNodes() + 1 ); + postOrderRelabelInternalNodes( phylogeny, phylogeny.getNumberOfExternalNodes() + 1 ); CharacterStateMatrix matrix = null; final PaupLogParser paup_log_parser = new PaupLogParser(); try { @@ -158,6 +160,16 @@ public class surf_paup { ForesterUtil.programMessage( PRG_NAME, "OK" ); } + final private static void postOrderRelabelInternalNodes( final Phylogeny phylogeny, final int starting_number ) { + int i = starting_number; + for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) { + final PhylogenyNode node = it.next(); + if ( !node.isExternal() ) { + node.setName( String.valueOf( i++ ) ); + } + } + } + private static void printHelp() { System.out.println(); System.out.println( "Usage:" ); diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index feb122d..42883c6 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -36,6 +36,7 @@ import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; @@ -54,6 +55,7 @@ import org.forester.go.PfamToGoMapping; import org.forester.go.PfamToGoParser; import org.forester.io.parsers.HmmscanPerDomainTableParser; import org.forester.io.parsers.HmmscanPerDomainTableParser.INDIVIDUAL_SCORE_CUTOFF; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; @@ -86,6 +88,7 @@ import org.forester.surfacing.Protein; import org.forester.surfacing.ProteinCountsBasedPairwiseDomainSimilarityCalculator; import org.forester.surfacing.Species; import org.forester.surfacing.SurfacingUtil; +import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.BasicTable; import org.forester.util.BasicTableParser; import org.forester.util.CommandLineArguments; @@ -487,7 +490,7 @@ public class surfacing { } try { final Phylogeny[] p_array = ParserBasedPhylogenyFactory.getInstance() - .create( intree_file, ForesterUtil.createParserDependingOnFileType( intree_file, true ) ); + .create( intree_file, ParserUtils.createParserDependingOnFileType( intree_file, true ) ); if ( p_array.length < 1 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "file [" + intree_file + "] does not contain any phylogeny in phyloXML format" ); @@ -1636,7 +1639,7 @@ public class surfacing { } } // if ( perform_pwc ) { System.out.println(); - html_desc.append( "Command line:" + cla.getCommandLineArgsAsString() + "" + nl ); + html_desc.append( "Command line:\n" + cla.getCommandLineArgsAsString() + "\n" + nl ); System.out.println( "Command line : " + cla.getCommandLineArgsAsString() ); BufferedWriter[] query_domains_writer_ary = null; List[] query_domain_ids_array = null; @@ -1719,6 +1722,39 @@ public class surfacing { catch ( final IOException e2 ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getMessage() ); } + final DescriptiveStatistics all_genomes_domains_per_potein_stats = new BasicDescriptiveStatistics(); + final SortedMap all_genomes_domains_per_potein_histo = new TreeMap(); + final SortedSet domains_which_are_always_single = new TreeSet(); + final SortedSet domains_which_are_sometimes_single_sometimes_not = new TreeSet(); + final SortedSet domains_which_never_single = new TreeSet(); + final BufferedWriter domains_which_are_always_single_writer = null; + final BufferedWriter domains_which_are_sometimes_single_sometimes_not_writer = null; + final BufferedWriter domains_which_never_single_writer = null; + BufferedWriter all_genomes_domains_per_potein_histo_writer = null; + BufferedWriter domains_per_potein_stats_writer = null; + try { + all_genomes_domains_per_potein_histo_writer = new BufferedWriter( new FileWriter( out_dir + + ForesterUtil.FILE_SEPARATOR + output_file + "__all_genomes_domains_per_potein_histo.txt" ) ); + domains_per_potein_stats_writer = new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + + output_file + "__domains_per_potein_stats.txt" ) ); + domains_per_potein_stats_writer.write( "Genome" ); + domains_per_potein_stats_writer.write( "\t" ); + domains_per_potein_stats_writer.write( "Mean" ); + domains_per_potein_stats_writer.write( "\t" ); + domains_per_potein_stats_writer.write( "SD" ); + domains_per_potein_stats_writer.write( "\t" ); + domains_per_potein_stats_writer.write( "Median" ); + domains_per_potein_stats_writer.write( "\t" ); + domains_per_potein_stats_writer.write( "N" ); + domains_per_potein_stats_writer.write( "\t" ); + domains_per_potein_stats_writer.write( "Min" ); + domains_per_potein_stats_writer.write( "\t" ); + domains_per_potein_stats_writer.write( "Max" ); + domains_per_potein_stats_writer.write( "\n" ); + } + catch ( final IOException e3 ) { + e3.printStackTrace(); + } for( int i = 0; i < number_of_genomes; ++i ) { System.out.println(); System.out.println( ( i + 1 ) + "/" + number_of_genomes ); @@ -1865,6 +1901,14 @@ public class surfacing { catch ( final IOException e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, e.toString() ); } + SurfacingUtil.domainsPerProteinsStatistics( input_file_properties[ i ][ 0 ], + protein_list, + all_genomes_domains_per_potein_stats, + all_genomes_domains_per_potein_histo, + domains_which_are_always_single, + domains_which_are_sometimes_single_sometimes_not, + domains_which_never_single, + domains_per_potein_stats_writer ); gwcd_list.add( BasicGenomeWideCombinableDomains .createInstance( protein_list, ignore_combination_with_same, @@ -1928,6 +1972,34 @@ public class surfacing { } ForesterUtil.programMessage( PRG_NAME, "Wrote domain promiscuities to: " + per_genome_domain_promiscuity_statistics_file ); + // + try { + domains_per_potein_stats_writer.write( "ALL" ); + domains_per_potein_stats_writer.write( "\t" ); + domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.arithmeticMean() + "" ); + domains_per_potein_stats_writer.write( "\t" ); + domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.sampleStandardDeviation() + "" ); + domains_per_potein_stats_writer.write( "\t" ); + domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.median() + "" ); + domains_per_potein_stats_writer.write( "\t" ); + domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.getN() + "" ); + domains_per_potein_stats_writer.write( "\t" ); + domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.getMin() + "" ); + domains_per_potein_stats_writer.write( "\t" ); + domains_per_potein_stats_writer.write( all_genomes_domains_per_potein_stats.getMax() + "" ); + domains_per_potein_stats_writer.write( "\n" ); + domains_per_potein_stats_writer.flush(); + domains_per_potein_stats_writer.close(); + for( final Entry entry : all_genomes_domains_per_potein_histo.entrySet() ) { + all_genomes_domains_per_potein_histo_writer.write( entry.getKey() + "\t" + entry.getValue() + "\n" ); + } + all_genomes_domains_per_potein_histo_writer.flush(); + all_genomes_domains_per_potein_histo_writer.close(); + } + catch ( final IOException e2 ) { + ForesterUtil.fatalError( surfacing.PRG_NAME, e2.getLocalizedMessage() ); + } + // if ( query_domains_writer_ary != null ) { for( int j = 0; j < query_domain_ids_array.length; j++ ) { try { diff --git a/forester/java/src/org/forester/application/surfacing_hmmpfam.java b/forester/java/src/org/forester/application/surfacing_hmmpfam.java index 8a1a107..4104a34 100644 --- a/forester/java/src/org/forester/application/surfacing_hmmpfam.java +++ b/forester/java/src/org/forester/application/surfacing_hmmpfam.java @@ -52,6 +52,7 @@ import org.forester.go.OBOparser; import org.forester.go.PfamToGoMapping; import org.forester.go.PfamToGoParser; import org.forester.io.parsers.HmmPfamOutputParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; @@ -495,7 +496,7 @@ public class surfacing_hmmpfam { } try { final Phylogeny[] p_array = ParserBasedPhylogenyFactory.getInstance() - .create( intree_file, ForesterUtil.createParserDependingOnFileType( intree_file, true ) ); + .create( intree_file, ParserUtils.createParserDependingOnFileType( intree_file, true ) ); if ( p_array.length < 1 ) { ForesterUtil.fatalError( surfacing_hmmpfam.PRG_NAME, "file [" + intree_file + "] does not contain any phylogeny in phyloXML format" ); diff --git a/forester/java/src/org/forester/application/ta.java b/forester/java/src/org/forester/application/ta.java index b460916..053b342 100644 --- a/forester/java/src/org/forester/application/ta.java +++ b/forester/java/src/org/forester/application/ta.java @@ -32,6 +32,7 @@ import java.util.Date; import java.util.List; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; @@ -127,7 +128,7 @@ public class ta { } try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( species_tree_file, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( species_tree_file, true ); species_tree = factory.create( species_tree_file, pp )[ 0 ]; } catch ( final IOException e ) { @@ -137,7 +138,7 @@ public class ta { } try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( gene_tree_file, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( gene_tree_file, true ); gene_tree = factory.create( gene_tree_file, pp )[ 0 ]; } catch ( final IOException e ) { diff --git a/forester/java/src/org/forester/archaeopteryx/Archaeopteryx.java b/forester/java/src/org/forester/archaeopteryx/Archaeopteryx.java index 8bd8df5..eddc919 100644 --- a/forester/java/src/org/forester/archaeopteryx/Archaeopteryx.java +++ b/forester/java/src/org/forester/archaeopteryx/Archaeopteryx.java @@ -31,7 +31,9 @@ import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.phyloxml.PhyloXmlParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.util.ForesterUtil; // @@ -85,16 +87,16 @@ public final class Archaeopteryx { ForesterUtil.fatalError( Constants.PRG_NAME, err ); } boolean nhx_or_nexus = false; - final PhylogenyParser p = ForesterUtil.createParserDependingOnFileType( f, conf + final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( f, conf .isValidatePhyloXmlAgainstSchema() ); if ( p instanceof NHXParser ) { nhx_or_nexus = true; final NHXParser nhx = ( NHXParser ) p; nhx.setReplaceUnderscores( conf.isReplaceUnderscoresInNhParsing() ); nhx.setIgnoreQuotes( false ); - ForesterUtil.TAXONOMY_EXTRACTION te = ForesterUtil.TAXONOMY_EXTRACTION.NO; + PhylogenyMethods.TAXONOMY_EXTRACTION te = PhylogenyMethods.TAXONOMY_EXTRACTION.NO; if ( conf.isExtractPfamTaxonomyCodesInNhParsing() ) { - te = ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY; + te = PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY; } nhx.setTaxonomyExtraction( te ); } @@ -107,10 +109,10 @@ public final class Archaeopteryx { else if ( p instanceof PhyloXmlParser ) { MainFrameApplication.warnIfNotPhyloXmlValidation( conf ); } - phylogenies = ForesterUtil.readPhylogenies( p, f ); + phylogenies = PhylogenyMethods.readPhylogenies( p, f ); if ( nhx_or_nexus && conf.isInternalNumberAreConfidenceForNhParsing() ) { for( final Phylogeny phy : phylogenies ) { - ForesterUtil.transferInternalNodeNamesToConfidence( phy ); + PhylogenyMethods.transferInternalNodeNamesToConfidence( phy ); } } } diff --git a/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java b/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java index 1793013..54d8418 100644 --- a/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java +++ b/forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java @@ -26,6 +26,7 @@ import javax.swing.UnsupportedLookAndFeelException; import javax.swing.event.ChangeEvent; import javax.swing.event.ChangeListener; +import org.apache.commons.codec.binary.Base64; import org.forester.archaeopteryx.Options.CLADOGRAM_TYPE; import org.forester.archaeopteryx.Options.NODE_LABEL_DIRECTION; import org.forester.archaeopteryx.Options.PHYLOGENY_GRAPHICS_TYPE; @@ -34,7 +35,6 @@ import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.data.SequenceRelation; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; -import org.apache.commons.codec.binary.Base64; // Use like this: // 0 ) ) { if ( nhx_or_nexus && getOptions().isInternalNumberAreConfidenceForNhParsing() ) { for( final Phylogeny phy : phys ) { - ForesterUtil.transferInternalNodeNamesToConfidence( phy ); + PhylogenyMethods.transferInternalNodeNamesToConfidence( phy ); } } Util.addPhylogeniesToTabs( phys, @@ -2103,7 +2102,7 @@ public final class MainFrameApplication extends MainFrame { if ( ( file != null ) && ( result == JFileChooser.APPROVE_OPTION ) ) { if ( _open_filechooser_for_species_tree.getFileFilter() == MainFrameApplication.xmlfilter ) { try { - final Phylogeny[] trees = ForesterUtil.readPhylogenies( new PhyloXmlParser(), file ); + final Phylogeny[] trees = PhylogenyMethods.readPhylogenies( new PhyloXmlParser(), file ); t = trees[ 0 ]; } catch ( final Exception e ) { @@ -2113,7 +2112,7 @@ public final class MainFrameApplication extends MainFrame { } else if ( _open_filechooser_for_species_tree.getFileFilter() == MainFrameApplication.tolfilter ) { try { - final Phylogeny[] trees = ForesterUtil.readPhylogenies( new TolParser(), file ); + final Phylogeny[] trees = PhylogenyMethods.readPhylogenies( new TolParser(), file ); t = trees[ 0 ]; } catch ( final Exception e ) { @@ -2124,7 +2123,7 @@ public final class MainFrameApplication extends MainFrame { // "*.*": else { try { - final Phylogeny[] trees = ForesterUtil.readPhylogenies( new PhyloXmlParser(), file ); + final Phylogeny[] trees = PhylogenyMethods.readPhylogenies( new PhyloXmlParser(), file ); t = trees[ 0 ]; } catch ( final Exception e ) { @@ -2198,9 +2197,9 @@ public final class MainFrameApplication extends MainFrame { private void setSpecialOptionsForNhxParser( final NHXParser nhx ) { nhx.setReplaceUnderscores( getOptions().isReplaceUnderscoresInNhParsing() ); - ForesterUtil.TAXONOMY_EXTRACTION te = ForesterUtil.TAXONOMY_EXTRACTION.NO; + PhylogenyMethods.TAXONOMY_EXTRACTION te = PhylogenyMethods.TAXONOMY_EXTRACTION.NO; if ( getOptions().isExtractPfamTaxonomyCodesInNhParsing() ) { - te = ForesterUtil.TAXONOMY_EXTRACTION.YES; + te = PhylogenyMethods.TAXONOMY_EXTRACTION.YES; } nhx.setTaxonomyExtraction( te ); } diff --git a/forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java b/forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java index f4bbca8..5a777ca 100644 --- a/forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java +++ b/forester/java/src/org/forester/archaeopteryx/NodeEditPanel.java @@ -565,7 +565,7 @@ class NodeEditPanel extends JPanel { } private List obtainPoints() { - ForesterUtil.ensurePresenceOfDistribution( getMyNode() ); + Util.ensurePresenceOfDistribution( getMyNode() ); Distribution d = getMyNode().getNodeData().getDistribution(); if ( d.getPoints() == null ) { d = new Distribution( d.getDesc(), new ArrayList(), d.getPolygons() ); @@ -731,7 +731,7 @@ class NodeEditPanel extends JPanel { } break; case TAXONOMY_CODE: - ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); + Util.ensurePresenceOfTaxonomy( getMyNode() ); try { getMyNode().getNodeData().getTaxonomy().setTaxonomyCode( value ); } @@ -741,15 +741,15 @@ class NodeEditPanel extends JPanel { } break; case TAXONOMY_SCIENTIFIC_NAME: - ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); + Util.ensurePresenceOfTaxonomy( getMyNode() ); getMyNode().getNodeData().getTaxonomy().setScientificName( value ); break; case TAXONOMY_COMMON_NAME: - ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); + Util.ensurePresenceOfTaxonomy( getMyNode() ); getMyNode().getNodeData().getTaxonomy().setCommonName( value ); break; case TAXONOMY_RANK: - ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); + Util.ensurePresenceOfTaxonomy( getMyNode() ); try { getMyNode().getNodeData().getTaxonomy().setRank( value.toLowerCase() ); } @@ -759,7 +759,7 @@ class NodeEditPanel extends JPanel { } break; case TAXONOMY_AUTHORITY: - ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); + Util.ensurePresenceOfTaxonomy( getMyNode() ); getMyNode().getNodeData().getTaxonomy().setAuthority( value ); break; case TAXONOMY_URI: { @@ -777,7 +777,7 @@ class NodeEditPanel extends JPanel { } } if ( uri != null ) { - ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); + Util.ensurePresenceOfTaxonomy( getMyNode() ); } addUri( mtn, uri, number, getMyNode().getNodeData().getTaxonomy() ); break; @@ -788,7 +788,7 @@ class NodeEditPanel extends JPanel { } else if ( getMyNode().getNodeData().getTaxonomy().getSynonyms().size() == number ) { if ( !ForesterUtil.isEmpty( value ) ) { - ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); + Util.ensurePresenceOfTaxonomy( getMyNode() ); getMyNode().getNodeData().getTaxonomy().getSynonyms().add( value ); } } @@ -797,7 +797,7 @@ class NodeEditPanel extends JPanel { } break; case TAXONOMY_ID_VALUE: - ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); + Util.ensurePresenceOfTaxonomy( getMyNode() ); if ( getMyNode().getNodeData().getTaxonomy().getIdentifier() == null ) { getMyNode().getNodeData().getTaxonomy().setIdentifier( new Identifier( value ) ); } @@ -807,7 +807,7 @@ class NodeEditPanel extends JPanel { } break; case TAXONOMY_ID_PROVIDER: - ForesterUtil.ensurePresenceOfTaxonomy( getMyNode() ); + Util.ensurePresenceOfTaxonomy( getMyNode() ); if ( getMyNode().getNodeData().getTaxonomy().getIdentifier() == null ) { getMyNode().getNodeData().getTaxonomy().setIdentifier( new Identifier( "", value ) ); } @@ -817,19 +817,19 @@ class NodeEditPanel extends JPanel { } break; case SEQ_LOCATION: - ForesterUtil.ensurePresenceOfSequence( getMyNode() ); + Util.ensurePresenceOfSequence( getMyNode() ); getMyNode().getNodeData().getSequence().setLocation( value ); break; case SEQ_MOL_SEQ: - ForesterUtil.ensurePresenceOfSequence( getMyNode() ); + Util.ensurePresenceOfSequence( getMyNode() ); getMyNode().getNodeData().getSequence().setMolecularSequence( value ); break; case SEQ_NAME: - ForesterUtil.ensurePresenceOfSequence( getMyNode() ); + Util.ensurePresenceOfSequence( getMyNode() ); getMyNode().getNodeData().getSequence().setName( value ); break; case SEQ_SYMBOL: - ForesterUtil.ensurePresenceOfSequence( getMyNode() ); + Util.ensurePresenceOfSequence( getMyNode() ); try { getMyNode().getNodeData().getSequence().setSymbol( value ); } @@ -839,7 +839,7 @@ class NodeEditPanel extends JPanel { } break; case SEQ_TYPE: - ForesterUtil.ensurePresenceOfSequence( getMyNode() ); + Util.ensurePresenceOfSequence( getMyNode() ); try { getMyNode().getNodeData().getSequence().setType( value.toLowerCase() ); } @@ -849,7 +849,7 @@ class NodeEditPanel extends JPanel { } break; case SEQ_ACC_SOURCE: - ForesterUtil.ensurePresenceOfSequence( getMyNode() ); + Util.ensurePresenceOfSequence( getMyNode() ); if ( getMyNode().getNodeData().getSequence().getAccession() == null ) { getMyNode().getNodeData().getSequence().setAccession( new Accession( "", value ) ); } @@ -859,7 +859,7 @@ class NodeEditPanel extends JPanel { } break; case SEQ_ACC_VALUE: - ForesterUtil.ensurePresenceOfSequence( getMyNode() ); + Util.ensurePresenceOfSequence( getMyNode() ); if ( getMyNode().getNodeData().getSequence().getAccession() == null ) { getMyNode().getNodeData().getSequence().setAccession( new Accession( value, "" ) ); } @@ -883,7 +883,7 @@ class NodeEditPanel extends JPanel { } } if ( uri != null ) { - ForesterUtil.ensurePresenceOfSequence( getMyNode() ); + Util.ensurePresenceOfSequence( getMyNode() ); } addUri( mtn, uri, number, getMyNode().getNodeData().getSequence() ); break; @@ -925,23 +925,23 @@ class NodeEditPanel extends JPanel { getMyNode().getNodeData().getEvent().setGeneLosses( parsePositiveInt( mtn, value ) ); break; case DATE_DESCRIPTION: - ForesterUtil.ensurePresenceOfDate( getMyNode() ); + Util.ensurePresenceOfDate( getMyNode() ); getMyNode().getNodeData().getDate().setDesc( value ); break; case DATE_MAX: - ForesterUtil.ensurePresenceOfDate( getMyNode() ); + Util.ensurePresenceOfDate( getMyNode() ); getMyNode().getNodeData().getDate().setMax( parseBigDecimal( mtn, value ) ); break; case DATE_MIN: - ForesterUtil.ensurePresenceOfDate( getMyNode() ); + Util.ensurePresenceOfDate( getMyNode() ); getMyNode().getNodeData().getDate().setMin( parseBigDecimal( mtn, value ) ); break; case DATE_UNIT: - ForesterUtil.ensurePresenceOfDate( getMyNode() ); + Util.ensurePresenceOfDate( getMyNode() ); getMyNode().getNodeData().getDate().setUnit( value ); break; case DATE_VALUE: - ForesterUtil.ensurePresenceOfDate( getMyNode() ); + Util.ensurePresenceOfDate( getMyNode() ); getMyNode().getNodeData().getDate().setValue( parseBigDecimal( mtn, value ) ); break; case DIST_ALT: { @@ -960,7 +960,7 @@ class NodeEditPanel extends JPanel { break; } case DIST_DESC: { - ForesterUtil.ensurePresenceOfDistribution( getMyNode() ); + Util.ensurePresenceOfDistribution( getMyNode() ); final Distribution d = getMyNode().getNodeData().getDistribution(); getMyNode().getNodeData().setDistribution( new Distribution( value, d.getPoints(), d.getPolygons() ) ); break; diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanel.java b/forester/java/src/org/forester/archaeopteryx/TreePanel.java index 9d29d9d..32de234 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanel.java @@ -267,7 +267,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee _main_panel = tjp; _configuration = configuration; _phylogeny = t; - _phy_has_branch_lengths = ForesterUtil.isHasAtLeastOneBranchLengthLargerThanZero( _phylogeny ); + _phy_has_branch_lengths = Util.isHasAtLeastOneBranchLengthLargerThanZero( _phylogeny ); init(); // if ( !_phylogeny.isEmpty() ) { _phylogeny.recalculateNumberOfExternalDescendants( true ); diff --git a/forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java b/forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java index 002adad..572a24c 100644 --- a/forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java +++ b/forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java @@ -42,6 +42,7 @@ import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.tol.TolParser; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; @@ -101,27 +102,27 @@ public class UrlTreeReader implements Runnable { break; case NH: parser = new NHXParser(); - ( ( NHXParser ) parser ).setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.NO ); + ( ( NHXParser ) parser ).setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.NO ); ( ( NHXParser ) parser ).setReplaceUnderscores( true ); ( ( NHXParser ) parser ).setGuessRootedness( true ); break; case NH_EXTRACT_TAXONOMY: parser = new NHXParser(); ( ( NHXParser ) parser ) - .setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); ( ( NHXParser ) parser ).setReplaceUnderscores( false ); ( ( NHXParser ) parser ).setGuessRootedness( true ); break; case PFAM: parser = new NHXParser(); ( ( NHXParser ) parser ) - .setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); ( ( NHXParser ) parser ).setReplaceUnderscores( false ); ( ( NHXParser ) parser ).setGuessRootedness( true ); break; case NHX: parser = new NHXParser(); - ( ( NHXParser ) parser ).setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.NO ); + ( ( NHXParser ) parser ).setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.NO ); ( ( NHXParser ) parser ).setReplaceUnderscores( false ); ( ( NHXParser ) parser ).setGuessRootedness( true ); break; @@ -183,13 +184,13 @@ public class UrlTreeReader implements Runnable { if ( client.getName().equals( WebserviceUtil.PFAM_NAME ) ) { phylogeny.setRerootable( false ); phylogeny.setRooted( true ); - ForesterUtil.transferInternalNodeNamesToConfidence( phylogeny ); + PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny ); } if ( client.getProcessingInstructions() != null ) { WebserviceUtil.processInstructions( client, phylogeny ); } if ( client.getNodeField() != null ) { - ForesterUtil.transferNodeNameToField( phylogeny, client.getNodeField() ); + PhylogenyMethods.transferNodeNameToField( phylogeny, client.getNodeField() ); } phylogeny.setIdentifier( new Identifier( identifier, client.getName() ) ); _main_frame.getJMenuBar().remove( _main_frame.getHelpMenu() ); diff --git a/forester/java/src/org/forester/archaeopteryx/Util.java b/forester/java/src/org/forester/archaeopteryx/Util.java index 9682e34..c163135 100644 --- a/forester/java/src/org/forester/archaeopteryx/Util.java +++ b/forester/java/src/org/forester/archaeopteryx/Util.java @@ -64,10 +64,13 @@ import org.forester.analysis.AncestralTaxonomyInference; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.io.parsers.tol.TolParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.BranchColor; +import org.forester.phylogeny.data.Distribution; +import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; @@ -85,6 +88,66 @@ public final class Util { Arrays.sort( AVAILABLE_FONT_FAMILIES_SORTED ); } + public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasTaxonomy() ) { + node.getNodeData().setTaxonomy( new Taxonomy() ); + } + } + + public static void ensurePresenceOfSequence( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasSequence() ) { + node.getNodeData().setSequence( new Sequence() ); + } + } + + final public static void ensurePresenceOfDistribution( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasDistribution() ) { + node.getNodeData().setDistribution( new Distribution( "" ) ); + } + } + + final public static void ensurePresenceOfDate( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasDate() ) { + node.getNodeData().setDate( new org.forester.phylogeny.data.Date() ); + } + } + + final static public boolean isHasAtLeastOneBranchWithSupportValues( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + if ( it.next().getBranchData().isHasConfidences() ) { + return true; + } + } + return false; + } + + /** + * Returns true if at least one branch has a length larger than zero. + * + * + * @param phy + */ + final static public boolean isHasAtLeastOneBranchLengthLargerThanZero( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + if ( it.next().getDistanceToParent() > 0.0 ) { + return true; + } + } + return false; + } + + final static public boolean isHasAtLeastNodeWithEvent( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + if ( it.next().getNodeData().isHasEvent() ) { + return true; + } + } + return false; + } + public static MaskFormatter createMaskFormatter( final String s ) { MaskFormatter formatter = null; try { @@ -634,13 +697,13 @@ public final class Util { final ControlPanel atv_control, final Configuration configuration ) { if ( ( t != null ) && !t.isEmpty() ) { - if ( !ForesterUtil.isHasAtLeastOneBranchLengthLargerThanZero( t ) ) { + if ( !Util.isHasAtLeastOneBranchLengthLargerThanZero( t ) ) { atv_control.setDrawPhylogram( false ); atv_control.setDrawPhylogramEnabled( false ); } if ( configuration.doGuessCheckOption( Configuration.display_as_phylogram ) ) { if ( atv_control.getDisplayAsPhylogramCb() != null ) { - if ( ForesterUtil.isHasAtLeastOneBranchLengthLargerThanZero( t ) ) { + if ( Util.isHasAtLeastOneBranchLengthLargerThanZero( t ) ) { atv_control.setDrawPhylogram( true ); atv_control.setDrawPhylogramEnabled( true ); } @@ -651,7 +714,7 @@ public final class Util { } if ( configuration.doGuessCheckOption( Configuration.write_confidence_values ) ) { if ( atv_control.getWriteConfidenceCb() != null ) { - if ( ForesterUtil.isHasAtLeastOneBranchWithSupportValues( t ) ) { + if ( Util.isHasAtLeastOneBranchWithSupportValues( t ) ) { atv_control.setCheckbox( Configuration.write_confidence_values, true ); } else { @@ -661,7 +724,7 @@ public final class Util { } if ( configuration.doGuessCheckOption( Configuration.write_events ) ) { if ( atv_control.getShowEventsCb() != null ) { - if ( ForesterUtil.isHasAtLeastNodeWithEvent( t ) ) { + if ( Util.isHasAtLeastNodeWithEvent( t ) ) { atv_control.setCheckbox( Configuration.write_events, true ); } else { @@ -735,7 +798,7 @@ public final class Util { parser = new TolParser(); } else { - parser = ForesterUtil.createParserDependingOnUrlContents( url, phyloxml_validate_against_xsd ); + parser = ParserUtils.createParserDependingOnUrlContents( url, phyloxml_validate_against_xsd ); } return factory.create( url.openStream(), parser ); } @@ -858,7 +921,7 @@ public final class Util { final TreePanel tree_panel, final ControlPanel ac, final GraphicsExportType type, - final Options options ) throws IOException{ + final Options options ) throws IOException { if ( !options.isGraphicsExportUsingActualSize() ) { if ( options.isGraphicsExportVisibleOnly() ) { throw new IllegalArgumentException( "cannot export visible rectangle only without exporting in actual size" ); @@ -916,7 +979,7 @@ public final class Util { } return msg; } - + final static void writeToTiff( final File file, final BufferedImage image ) throws IOException { // See: http://log.robmeek.com/2005/08/write-tiff-in-java.html ImageWriter writer = null; diff --git a/forester/java/src/org/forester/archaeopteryx/webservices/BasicPhylogeniesWebserviceClient.java b/forester/java/src/org/forester/archaeopteryx/webservices/BasicPhylogeniesWebserviceClient.java index f970c64..2187a12 100644 --- a/forester/java/src/org/forester/archaeopteryx/webservices/BasicPhylogeniesWebserviceClient.java +++ b/forester/java/src/org/forester/archaeopteryx/webservices/BasicPhylogeniesWebserviceClient.java @@ -26,27 +26,27 @@ package org.forester.archaeopteryx.webservices; import org.forester.archaeopteryx.webservices.WebservicesManager.WsPhylogenyFormat; -import org.forester.util.ForesterUtil.PhylogenyNodeField; +import org.forester.phylogeny.PhylogenyMethods; public class BasicPhylogeniesWebserviceClient implements PhylogeniesWebserviceClient { - private final String _desc; - private final String _instructions; - private final String _menu_name; - private final String _name; - private final WsPhylogenyFormat _format; - private final String _url; - private final boolean _integer; - private final PhylogenyNodeField _node_field; - private final Object _proc_inst; - private final String _ref; + private final String _desc; + private final String _instructions; + private final String _menu_name; + private final String _name; + private final WsPhylogenyFormat _format; + private final String _url; + private final boolean _integer; + private final PhylogenyMethods.PhylogenyNodeField _node_field; + private final Object _proc_inst; + private final String _ref; public BasicPhylogeniesWebserviceClient( final String name, final String menu_name, final String desc, final String instructions, final WsPhylogenyFormat format, - final PhylogenyNodeField node_field, + final PhylogenyMethods.PhylogenyNodeField node_field, final String url, final boolean integer, final String ref, @@ -85,7 +85,7 @@ public class BasicPhylogeniesWebserviceClient implements PhylogeniesWebserviceCl } @Override - public PhylogenyNodeField getNodeField() { + public PhylogenyMethods.PhylogenyNodeField getNodeField() { return _node_field; } diff --git a/forester/java/src/org/forester/archaeopteryx/webservices/PhylogeniesWebserviceClient.java b/forester/java/src/org/forester/archaeopteryx/webservices/PhylogeniesWebserviceClient.java index 100bc22..0d8b09c 100644 --- a/forester/java/src/org/forester/archaeopteryx/webservices/PhylogeniesWebserviceClient.java +++ b/forester/java/src/org/forester/archaeopteryx/webservices/PhylogeniesWebserviceClient.java @@ -25,7 +25,7 @@ package org.forester.archaeopteryx.webservices; import org.forester.archaeopteryx.webservices.WebservicesManager.WsPhylogenyFormat; -import org.forester.util.ForesterUtil.PhylogenyNodeField; +import org.forester.phylogeny.PhylogenyMethods; /* * Webservices which return phylogenies. @@ -69,7 +69,7 @@ public interface PhylogeniesWebserviceClient { * * @return the field code */ - public PhylogenyNodeField getNodeField(); + public PhylogenyMethods.PhylogenyNodeField getNodeField(); /** * This is used to indicate any kind of special processing. diff --git a/forester/java/src/org/forester/archaeopteryx/webservices/WebserviceUtil.java b/forester/java/src/org/forester/archaeopteryx/webservices/WebserviceUtil.java index 1918587..71c047a 100644 --- a/forester/java/src/org/forester/archaeopteryx/webservices/WebserviceUtil.java +++ b/forester/java/src/org/forester/archaeopteryx/webservices/WebserviceUtil.java @@ -30,6 +30,7 @@ import java.util.List; import org.forester.archaeopteryx.webservices.WebservicesManager.WsPhylogenyFormat; import org.forester.io.parsers.phyloxml.PhyloXmlUtil; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; import org.forester.phylogeny.data.Identifier; @@ -37,7 +38,6 @@ import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.phylogeny.iterators.PreorderTreeIterator; import org.forester.util.ForesterUtil; -import org.forester.util.ForesterUtil.PhylogenyNodeField; public final class WebserviceUtil { @@ -60,7 +60,7 @@ public final class WebserviceUtil { "Please enter a Tree of Life node identifier\n(Examples: " + "19386 for Cephalopoda, 2461 for Cnidaria, 2466 for Deuterostomia)", WsPhylogenyFormat.TOL_XML_RESPONSE, - PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME, + PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME, WebserviceUtil.TOL_WEBSERVER, true, "http://tolweb.org", @@ -70,7 +70,7 @@ public final class WebserviceUtil { "Use TreeBASE to obtain a phylogeny", "Please enter a TreeBASE tree identifier\n(Examples: 2654, 825, 4931, 2518, 2406, 4934)", WsPhylogenyFormat.NEXUS, - PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME, + PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME, "http://purl.org/phylo/treebase/phylows/tree/TB2:Tr" + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER + "?format=nexus", diff --git a/forester/java/src/org/forester/io/parsers/FastaParser.java b/forester/java/src/org/forester/io/parsers/FastaParser.java index 7b7bfac..b99a4d4 100644 --- a/forester/java/src/org/forester/io/parsers/FastaParser.java +++ b/forester/java/src/org/forester/io/parsers/FastaParser.java @@ -38,6 +38,7 @@ import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.forester.archaeopteryx.Util; import org.forester.msa.BasicMsa; import org.forester.msa.Msa; import org.forester.msa.MsaFormatException; @@ -202,15 +203,15 @@ public class FastaParser { final String seq_name = name_m.group( 3 ); final String tax_sn = name_m.group( 4 ); if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); + Util.ensurePresenceOfSequence( node ); node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) ); } if ( !ForesterUtil.isEmpty( seq_name ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); + Util.ensurePresenceOfSequence( node ); node.getNodeData().getSequence( 0 ).setName( seq_name ); } if ( !ForesterUtil.isEmpty( tax_sn ) ) { - ForesterUtil.ensurePresenceOfTaxonomy( node ); + Util.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn ); } } diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java index 4279ade..0abd855 100644 --- a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java +++ b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java @@ -41,6 +41,7 @@ import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; @@ -72,7 +73,7 @@ public class NexusPhylogeniesParser implements PhylogenyParser { final boolean is_rooted ) throws IOException { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); final NHXParser pars = new NHXParser(); - pars.setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.NO ); + pars.setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.NO ); pars.setReplaceUnderscores( isReplaceUnderscores() ); pars.setIgnoreQuotes( isIgnoreQuotes() ); if ( rooted_info_present ) { diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java index cdde33d..47771ce 100644 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java +++ b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java @@ -39,6 +39,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; @@ -57,34 +58,34 @@ import org.forester.util.ForesterUtil; public final class NHXParser implements PhylogenyParser { - public static final boolean LIMIT_SPECIES_NAMES_TO_FIVE_CHARS = true; - public static final ForesterUtil.TAXONOMY_EXTRACTION TAXONOMY_EXTRACTION_DEFAULT = ForesterUtil.TAXONOMY_EXTRACTION.NO; - final static private boolean GUESS_ROOTEDNESS_DEFAULT = true; - final static private boolean GUESS_IF_SUPPORT_VALUES = true; - final static private boolean IGNORE_QUOTES_DEFAULT = false; - final static public boolean REPLACE_UNDERSCORES_DEFAULT = false; - private boolean _saw_closing_paren; - final static private byte STRING = 0; - final static private byte STRING_BUFFER = 1; - final static private byte CHAR_ARRAY = 2; - final static private byte BUFFERED_READER = 3; - private boolean _guess_rootedness; - private boolean _has_next; - private boolean _ignore_quotes; - private byte _input_type; - private int _source_length; - private PhylogenyNode _current_node; - private StringBuilder _current_anotation; - private Object _nhx_source; - private int _clade_level; - private List _phylogenies; - private Phylogeny _current_phylogeny; - private ForesterUtil.TAXONOMY_EXTRACTION _taxonomy_extraction; - private boolean _replace_underscores; - public final static Pattern UC_LETTERS_NUMBERS_PATTERN = Pattern - .compile( "^[A-Z0-9]+$" ); - public final static Pattern NUMBERS_ONLY_PATTERN = Pattern - .compile( "^[0-9]+$" ); + public static final boolean LIMIT_SPECIES_NAMES_TO_FIVE_CHARS = true; + public static final PhylogenyMethods.TAXONOMY_EXTRACTION TAXONOMY_EXTRACTION_DEFAULT = PhylogenyMethods.TAXONOMY_EXTRACTION.NO; + final static private boolean GUESS_ROOTEDNESS_DEFAULT = true; + final static private boolean GUESS_IF_SUPPORT_VALUES = true; + final static private boolean IGNORE_QUOTES_DEFAULT = false; + final static public boolean REPLACE_UNDERSCORES_DEFAULT = false; + private boolean _saw_closing_paren; + final static private byte STRING = 0; + final static private byte STRING_BUFFER = 1; + final static private byte CHAR_ARRAY = 2; + final static private byte BUFFERED_READER = 3; + private boolean _guess_rootedness; + private boolean _has_next; + private boolean _ignore_quotes; + private byte _input_type; + private int _source_length; + private PhylogenyNode _current_node; + private StringBuilder _current_anotation; + private Object _nhx_source; + private int _clade_level; + private List _phylogenies; + private Phylogeny _current_phylogeny; + private PhylogenyMethods.TAXONOMY_EXTRACTION _taxonomy_extraction; + private boolean _replace_underscores; + public final static Pattern UC_LETTERS_NUMBERS_PATTERN = Pattern + .compile( "^[A-Z0-9]+$" ); + public final static Pattern NUMBERS_ONLY_PATTERN = Pattern + .compile( "^[0-9]+$" ); public NHXParser() { init(); @@ -187,7 +188,7 @@ public final class NHXParser implements PhylogenyParser { return _source_length; } - public ForesterUtil.TAXONOMY_EXTRACTION getTaxonomyExtraction() { + public PhylogenyMethods.TAXONOMY_EXTRACTION getTaxonomyExtraction() { return _taxonomy_extraction; } @@ -588,7 +589,7 @@ public final class NHXParser implements PhylogenyParser { _source_length = source_length; } - public void setTaxonomyExtraction( final ForesterUtil.TAXONOMY_EXTRACTION taxonomy_extraction ) { + public void setTaxonomyExtraction( final PhylogenyMethods.TAXONOMY_EXTRACTION taxonomy_extraction ) { _taxonomy_extraction = taxonomy_extraction; } @@ -628,9 +629,9 @@ public final class NHXParser implements PhylogenyParser { public static void parseNHX( String s, final PhylogenyNode node_to_annotate, - final ForesterUtil.TAXONOMY_EXTRACTION taxonomy_extraction, + final PhylogenyMethods.TAXONOMY_EXTRACTION taxonomy_extraction, final boolean replace_underscores ) throws NHXFormatException { - if ( ( taxonomy_extraction != ForesterUtil.TAXONOMY_EXTRACTION.NO ) && replace_underscores ) { + if ( ( taxonomy_extraction != PhylogenyMethods.TAXONOMY_EXTRACTION.NO ) && replace_underscores ) { throw new IllegalArgumentException( "cannot extract taxonomies and replace under scores at the same time" ); } if ( ( s != null ) && ( s.length() > 0 ) ) { @@ -674,8 +675,8 @@ public final class NHXParser implements PhylogenyParser { if ( !s.startsWith( ":" ) ) { node_to_annotate.setName( t.nextToken() ); if ( !replace_underscores - && ( !is_nhx && ( taxonomy_extraction != ForesterUtil.TAXONOMY_EXTRACTION.NO ) ) ) { - final String tax = ForesterUtil + && ( !is_nhx && ( taxonomy_extraction != PhylogenyMethods.TAXONOMY_EXTRACTION.NO ) ) ) { + final String tax = ParserUtils .extractTaxonomyCodeFromNodeName( node_to_annotate.getName(), LIMIT_SPECIES_NAMES_TO_FIVE_CHARS, taxonomy_extraction ); diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index bb8b6c9..b8dd42e 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -34,9 +34,193 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.StringReader; +import java.net.URL; +import java.util.regex.Matcher; + +import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.nexus.NexusPhylogeniesParser; +import org.forester.io.parsers.nhx.NHXParser; +import org.forester.io.parsers.phyloxml.PhyloXmlParser; +import org.forester.io.parsers.tol.TolParser; +import org.forester.phylogeny.PhylogenyMethods; +import org.forester.util.ForesterConstants; +import org.forester.util.ForesterUtil; public final class ParserUtils { + final public static PhylogenyParser createParserDependingOnUrlContents( final URL url, + final boolean phyloxml_validate_against_xsd ) + throws FileNotFoundException, IOException { + final String lc_filename = url.getFile().toString().toLowerCase(); + PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd ); + if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) { + if ( parser instanceof PhyloXmlParser ) { + ( ( PhyloXmlParser ) parser ).setZippedInputstream( true ); + } + else if ( parser instanceof TolParser ) { + ( ( TolParser ) parser ).setZippedInputstream( true ); + } + } + if ( parser == null ) { + final String first_line = ForesterUtil.getFirstLine( url ).trim().toLowerCase(); + if ( first_line.startsWith( "<" ) ) { + parser = new PhyloXmlParser(); + if ( phyloxml_validate_against_xsd ) { + final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); + final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); + if ( xsd_url != null ) { + ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); + } + else { + throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" + + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); + } + } + } + else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) + || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { + parser = new NexusPhylogeniesParser(); + } + else { + parser = new NHXParser(); + } + } + return parser; + } + + /** + * Return null if it can not guess the parser to use based on name suffix. + * + * @param filename + * @return + */ + final public static PhylogenyParser createParserDependingOnSuffix( final String filename, + final boolean phyloxml_validate_against_xsd ) { + PhylogenyParser parser = null; + final String filename_lc = filename.toLowerCase(); + if ( filename_lc.endsWith( ".tol" ) || filename_lc.endsWith( ".tolxml" ) || filename_lc.endsWith( ".tol.zip" ) ) { + parser = new TolParser(); + } + else if ( filename_lc.endsWith( ".xml" ) || filename_lc.endsWith( ".px" ) || filename_lc.endsWith( "phyloxml" ) + || filename_lc.endsWith( ".zip" ) ) { + parser = new PhyloXmlParser(); + if ( phyloxml_validate_against_xsd ) { + final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); + final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); + if ( xsd_url != null ) { + ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); + } + else { + if ( ForesterConstants.RELEASE ) { + throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" + + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); + } + } + } + } + else if ( filename_lc.endsWith( ".nexus" ) || filename_lc.endsWith( ".nex" ) || filename_lc.endsWith( ".nx" ) ) { + parser = new NexusPhylogeniesParser(); + } + else if ( filename_lc.endsWith( ".nhx" ) || filename_lc.endsWith( ".nh" ) || filename_lc.endsWith( ".newick" ) ) { + parser = new NHXParser(); + } + return parser; + } + + final public static PhylogenyParser createParserDependingOnFileType( final File file, + final boolean phyloxml_validate_against_xsd ) + throws FileNotFoundException, IOException { + PhylogenyParser parser = null; + parser = ParserUtils.createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd ); + if ( parser == null ) { + parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd ); + } + return parser; + } + + final public static PhylogenyParser createParserDependingFileContents( final File file, + final boolean phyloxml_validate_against_xsd ) + throws FileNotFoundException, IOException { + PhylogenyParser parser = null; + final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase(); + if ( first_line.startsWith( "<" ) ) { + parser = new PhyloXmlParser(); + if ( phyloxml_validate_against_xsd ) { + final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); + final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); + if ( xsd_url != null ) { + ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); + } + else { + if ( ForesterConstants.RELEASE ) { + throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" + + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); + } + } + } + } + else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) + || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { + parser = new NexusPhylogeniesParser(); + } + else { + parser = new NHXParser(); + } + return parser; + } + + /** + * Extracts a code if and only if: + * one and only one _, + * shorter than 25, + * no |, + * no ., + * if / present it has to be after the _, + * if PFAM_STYLE_ONLY: / must be present, + * tax code can only contain uppercase letters and numbers, + * and must contain at least one uppercase letter. + * Return null if no code extractable. + * + * @param name + * @param limit_to_five + * @return + */ + public static String extractTaxonomyCodeFromNodeName( final String name, + final boolean limit_to_five, + final PhylogenyMethods.TAXONOMY_EXTRACTION taxonomy_extraction ) { + if ( ( name.indexOf( "_" ) > 0 ) + && ( name.length() < 25 ) + && ( name.lastIndexOf( "_" ) == name.indexOf( "_" ) ) + && ( name.indexOf( "|" ) < 0 ) + && ( name.indexOf( "." ) < 0 ) + && ( ( taxonomy_extraction != PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) || ( name + .indexOf( "/" ) >= 0 ) ) + && ( ( ( name.indexOf( "/" ) ) < 0 ) || ( name.indexOf( "/" ) > name.indexOf( "_" ) ) ) ) { + final String[] s = name.split( "[_/]" ); + if ( s.length > 1 ) { + String str = s[ 1 ]; + if ( limit_to_five ) { + if ( str.length() > 5 ) { + str = str.substring( 0, 5 ); + } + else if ( ( str.length() < 5 ) && ( str.startsWith( "RAT" ) || str.startsWith( "PIG" ) ) ) { + str = str.substring( 0, 3 ); + } + } + final Matcher letters_and_numbers = NHXParser.UC_LETTERS_NUMBERS_PATTERN.matcher( str ); + if ( !letters_and_numbers.matches() ) { + return null; + } + final Matcher numbers_only = NHXParser.NUMBERS_ONLY_PATTERN.matcher( str ); + if ( numbers_only.matches() ) { + return null; + } + return str; + } + } + return null; + } + public static BufferedReader createReader( final Object source ) throws IOException, FileNotFoundException { BufferedReader reader = null; if ( ( source instanceof File ) || ( source instanceof String ) ) { diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 2b7fd29..0d404e8 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -26,6 +26,8 @@ package org.forester.phylogeny; import java.awt.Color; +import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -35,11 +37,18 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; +import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.phyloxml.PhyloXmlUtil; +import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.data.BranchColor; import org.forester.phylogeny.data.BranchWidth; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.DomainArchitecture; +import org.forester.phylogeny.data.Identifier; +import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.BasicDescriptiveStatistics; import org.forester.util.DescriptiveStatistics; @@ -170,6 +179,145 @@ public class PhylogenyMethods { return !obtainLCA( node1, node2 ).isDuplication(); } + public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny[] trees = factory.create( file, parser ); + if ( ( trees == null ) || ( trees.length == 0 ) ) { + throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + } + return trees; + } + + final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isRoot() && !n.isExternal() && !n.getBranchData().isHasConfidences() ) { + if ( !ForesterUtil.isEmpty( n.getName() ) ) { + double d = -1.0; + try { + d = Double.parseDouble( n.getName() ); + } + catch ( final Exception e ) { + d = -1.0; + } + if ( d >= 0.0 ) { + n.getBranchData().addConfidence( new Confidence( d, "" ) ); + n.setName( "" ); + } + } + } + } + } + + final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) { + double value = -1; + try { + value = Double.parseDouble( n.getName() ); + } + catch ( final NumberFormatException e ) { + throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: " + + e.getLocalizedMessage() ); + } + if ( value >= 0.0 ) { + n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) ); + n.setName( "" ); + } + } + } + } + + final static public void transferNodeNameToField( final Phylogeny phy, + final PhylogenyMethods.PhylogenyNodeField field ) { + final PhylogenyNodeIterator it = phy.iteratorPostorder(); + while ( it.hasNext() ) { + final PhylogenyNode n = it.next(); + final String name = n.getName().trim(); + if ( !ForesterUtil.isEmpty( name ) ) { + switch ( field ) { + case TAXONOMY_CODE: + //temp hack + // if ( name.length() > 5 ) { + // n.setName( "" ); + // if ( !n.getNodeData().isHasTaxonomy() ) { + // n.getNodeData().setTaxonomy( new Taxonomy() ); + // } + // n.getNodeData().getTaxonomy().setScientificName( name ); + // break; + // } + // + n.setName( "" ); + setTaxonomyCode( n, name ); + break; + case TAXONOMY_SCIENTIFIC_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setScientificName( name ); + break; + case TAXONOMY_COMMON_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + n.getNodeData().getTaxonomy().setCommonName( name ); + break; + case SEQUENCE_SYMBOL: + n.setName( "" ); + if ( !n.getNodeData().isHasSequence() ) { + n.getNodeData().setSequence( new Sequence() ); + } + n.getNodeData().getSequence().setSymbol( name ); + break; + case SEQUENCE_NAME: + n.setName( "" ); + if ( !n.getNodeData().isHasSequence() ) { + n.getNodeData().setSequence( new Sequence() ); + } + n.getNodeData().getSequence().setName( name ); + break; + case TAXONOMY_ID_UNIPROT_1: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + String id = name; + final int i = name.indexOf( '_' ); + if ( i > 0 ) { + id = name.substring( 0, i ); + } + else { + n.setName( "" ); + } + n.getNodeData().getTaxonomy() + .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); + break; + } + case TAXONOMY_ID_UNIPROT_2: { + if ( !n.getNodeData().isHasTaxonomy() ) { + n.getNodeData().setTaxonomy( new Taxonomy() ); + } + String id = name; + final int i = name.indexOf( '_' ); + if ( i > 0 ) { + id = name.substring( i + 1, name.length() ); + } + else { + n.setName( "" ); + } + n.getNodeData().getTaxonomy() + .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); + break; + } + } + } + } + } + static double addPhylogenyDistances( final double a, final double b ) { if ( ( a >= 0.0 ) && ( b >= 0.0 ) ) { return a + b; @@ -1250,4 +1398,19 @@ public class PhylogenyMethods { } return nodes_to_delete.size(); } + + public static enum PhylogenyNodeField { + CLADE_NAME, + TAXONOMY_CODE, + TAXONOMY_SCIENTIFIC_NAME, + TAXONOMY_COMMON_NAME, + SEQUENCE_SYMBOL, + SEQUENCE_NAME, + TAXONOMY_ID_UNIPROT_1, + TAXONOMY_ID_UNIPROT_2; + } + + public static enum TAXONOMY_EXTRACTION { + NO, YES, PFAM_STYLE_ONLY; + } } diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java index 7c272e6..7bf60ef 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java @@ -1049,24 +1049,24 @@ public class PhylogenyNode implements PhylogenyNodeI, Comparable } public static PhylogenyNode createInstanceFromNhxString( final String nhx ) throws NHXFormatException { - return new PhylogenyNode( nhx, ForesterUtil.TAXONOMY_EXTRACTION.NO, false ); + return new PhylogenyNode( nhx, PhylogenyMethods.TAXONOMY_EXTRACTION.NO, false ); } public static PhylogenyNode createInstanceFromNhxString( final String nhx, - final ForesterUtil.TAXONOMY_EXTRACTION taxonomy_extraction ) + final PhylogenyMethods.TAXONOMY_EXTRACTION taxonomy_extraction ) throws NHXFormatException { return new PhylogenyNode( nhx, taxonomy_extraction, false ); } public static PhylogenyNode createInstanceFromNhxString( final String nhx, - final ForesterUtil.TAXONOMY_EXTRACTION taxonomy_extraction, + final PhylogenyMethods.TAXONOMY_EXTRACTION taxonomy_extraction, final boolean replace_underscores ) throws NHXFormatException { return new PhylogenyNode( nhx, taxonomy_extraction, replace_underscores ); } private PhylogenyNode( final String nhx, - final ForesterUtil.TAXONOMY_EXTRACTION taxonomy_extraction, + final PhylogenyMethods.TAXONOMY_EXTRACTION taxonomy_extraction, final boolean replace_underscores ) throws NHXFormatException { init(); NHXParser.parseNHX( nhx, this, taxonomy_extraction, replace_underscores ); diff --git a/forester/java/src/org/forester/sdi/DistanceCalculator.java b/forester/java/src/org/forester/sdi/DistanceCalculator.java index dc72099..eb592d8 100644 --- a/forester/java/src/org/forester/sdi/DistanceCalculator.java +++ b/forester/java/src/org/forester/sdi/DistanceCalculator.java @@ -33,11 +33,11 @@ import java.util.ListIterator; import java.util.Vector; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; -import org.forester.util.ForesterUtil; /* * @author Christian M. Zmasek @@ -480,7 +480,7 @@ public class DistanceCalculator { tree_file = new File( args[ 0 ] ); try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( tree_file, true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( tree_file, true ); tree = factory.create( tree_file, pp )[ 0 ]; } catch ( final Exception e ) { diff --git a/forester/java/src/org/forester/sdi/ORcount.java b/forester/java/src/org/forester/sdi/ORcount.java index a64fbad..567a67c 100644 --- a/forester/java/src/org/forester/sdi/ORcount.java +++ b/forester/java/src/org/forester/sdi/ORcount.java @@ -33,6 +33,7 @@ import java.util.HashMap; import java.util.List; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; @@ -350,7 +351,7 @@ public class ORcount { for( int i = 0; i < trees.length; ++i ) { try { System.out.println( "Reading tree #" + i + " [" + args[ i ] + "]" ); - final PhylogenyParser pp = ForesterUtil.createParserDependingOnFileType( new File( args[ i ] ), true ); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( new File( args[ i ] ), true ); trees[ i ] = factory.create( new File( args[ i ] ), pp )[ 0 ]; } catch ( final Exception e ) { diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index 60cf2f4..e1ee0ab 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -2285,4 +2285,72 @@ public final class SurfacingUtil { domain_parsimony.createMatrixOfBinaryDomainCombinationPresenceOrAbsence(), phylogeny ); } + + public static void domainsPerProteinsStatistics( final String genome, + final List protein_list, + final DescriptiveStatistics all_genomes_domains_per_potein_stats, + final SortedMap all_genomes_domains_per_potein_histo, + final SortedSet domains_which_are_always_single, + final SortedSet domains_which_are_sometimes_single_sometimes_not, + final SortedSet domains_which_never_single, + final Writer writer ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( final Protein protein : protein_list ) { + final int domains = protein.getNumberOfProteinDomains(); + stats.addValue( domains ); + all_genomes_domains_per_potein_stats.addValue( domains ); + if ( !all_genomes_domains_per_potein_histo.containsKey( domains ) ) { + all_genomes_domains_per_potein_histo.put( domains, 1 ); + } + else { + all_genomes_domains_per_potein_histo.put( domains, + 1 + all_genomes_domains_per_potein_histo.get( domains ) ); + } + if ( domains == 1 ) { + final String domain = protein.getProteinDomain( 0 ).getDomainId().getId(); + if ( !domains_which_are_sometimes_single_sometimes_not.contains( domain ) ) { + if ( domains_which_never_single.contains( domain ) ) { + domains_which_never_single.remove( domain ); + domains_which_are_sometimes_single_sometimes_not.add( domain ); + } + else { + domains_which_are_always_single.add( domain ); + } + } + } + else if ( domains > 1 ) { + for( final Domain d : protein.getProteinDomains() ) { + final String domain = d.getDomainId().getId(); + if ( !domains_which_are_sometimes_single_sometimes_not.contains( domain ) ) { + if ( domains_which_are_always_single.contains( domain ) ) { + domains_which_are_always_single.remove( domain ); + domains_which_are_sometimes_single_sometimes_not.add( domain ); + } + else { + domains_which_never_single.add( domain ); + } + } + } + } + } + try { + writer.write( genome ); + writer.write( "\t" ); + writer.write( stats.arithmeticMean() + "" ); + writer.write( "\t" ); + writer.write( stats.sampleStandardDeviation() + "" ); + writer.write( "\t" ); + writer.write( stats.median() + "" ); + writer.write( "\t" ); + writer.write( stats.getN() + "" ); + writer.write( "\t" ); + writer.write( stats.getMin() + "" ); + writer.write( "\t" ); + writer.write( stats.getMax() + "" ); + writer.write( "\n" ); + } + catch ( final IOException e ) { + e.printStackTrace(); + } + } } diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 78fcc21..8b440a3 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -794,11 +794,11 @@ public final class Test { } final PhylogenyNode n1 = new PhylogenyNode(); final PhylogenyNode n2 = PhylogenyNode - .createInstanceFromNhxString( "", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); final PhylogenyNode n3 = PhylogenyNode - .createInstanceFromNhxString( "n3", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n3", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); final PhylogenyNode n4 = PhylogenyNode - .createInstanceFromNhxString( "n4:0.01", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n4:0.01", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( n1.isHasAssignedEvent() ) { return false; } @@ -4209,7 +4209,7 @@ public final class Test { return false; } final NHXParser nhxp = new NHXParser(); - nhxp.setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.NO ); + nhxp.setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.NO ); nhxp.setReplaceUnderscores( true ); final Phylogeny uc0 = factory.create( "(A__A_,_B_B)", nhxp )[ 0 ]; if ( !uc0.getRoot().getChildNode( 0 ).getName().equals( "A A " ) ) { @@ -4535,7 +4535,8 @@ public final class Test { return false; } final PhylogenyNode n8 = PhylogenyNode - .createInstanceFromNhxString( "n8_ECOLI/12:0.01", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n8_ECOLI/12:0.01", + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n8.getName().equals( "n8_ECOLI/12" ) ) { return false; } @@ -4544,7 +4545,7 @@ public final class Test { } final PhylogenyNode n9 = PhylogenyNode .createInstanceFromNhxString( "n9_ECOLI/12=12:0.01", - ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n9.getName().equals( "n9_ECOLI/12=12" ) ) { return false; } @@ -4552,20 +4553,20 @@ public final class Test { return false; } final PhylogenyNode n10 = PhylogenyNode - .createInstanceFromNhxString( "n10.ECOLI", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n10.ECOLI", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n10.getName().equals( "n10.ECOLI" ) ) { return false; } final PhylogenyNode n20 = PhylogenyNode - .createInstanceFromNhxString( "n20_ECOLI/1-2", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n20_ECOLI/1-2", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n20.getName().equals( "n20_ECOLI/1-2" ) ) { return false; } if ( !PhylogenyMethods.getSpecies( n20 ).equals( "ECOLI" ) ) { return false; } - final PhylogenyNode n20x = PhylogenyNode.createInstanceFromNhxString( "n20_ECOL1/1-2", - ForesterUtil.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n20x = PhylogenyNode + .createInstanceFromNhxString( "n20_ECOL1/1-2", PhylogenyMethods.TAXONOMY_EXTRACTION.YES ); if ( !n20x.getName().equals( "n20_ECOL1/1-2" ) ) { return false; } @@ -4573,7 +4574,7 @@ public final class Test { return false; } final PhylogenyNode n20xx = PhylogenyNode - .createInstanceFromNhxString( "n20_eCOL1/1-2", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n20_eCOL1/1-2", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n20xx.getName().equals( "n20_eCOL1/1-2" ) ) { return false; } @@ -4581,7 +4582,7 @@ public final class Test { return false; } final PhylogenyNode n20xxx = PhylogenyNode - .createInstanceFromNhxString( "n20_ecoli/1-2", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n20_ecoli/1-2", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n20xxx.getName().equals( "n20_ecoli/1-2" ) ) { return false; } @@ -4589,15 +4590,15 @@ public final class Test { return false; } final PhylogenyNode n20xxxx = PhylogenyNode - .createInstanceFromNhxString( "n20_Ecoli/1-2", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n20_Ecoli/1-2", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n20xxxx.getName().equals( "n20_Ecoli/1-2" ) ) { return false; } if ( PhylogenyMethods.getSpecies( n20xxxx ).length() > 0 ) { return false; } - final PhylogenyNode n21 = PhylogenyNode.createInstanceFromNhxString( "n21_PIG", - ForesterUtil.TAXONOMY_EXTRACTION.YES ); + final PhylogenyNode n21 = PhylogenyNode + .createInstanceFromNhxString( "n21_PIG", PhylogenyMethods.TAXONOMY_EXTRACTION.YES ); if ( !n21.getName().equals( "n21_PIG" ) ) { return false; } @@ -4605,7 +4606,7 @@ public final class Test { return false; } final PhylogenyNode n21x = PhylogenyNode - .createInstanceFromNhxString( "n21_PIG", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n21_PIG", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n21x.getName().equals( "n21_PIG" ) ) { return false; } @@ -4613,7 +4614,7 @@ public final class Test { return false; } final PhylogenyNode n22 = PhylogenyNode - .createInstanceFromNhxString( "n22/PIG", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n22/PIG", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n22.getName().equals( "n22/PIG" ) ) { return false; } @@ -4621,7 +4622,7 @@ public final class Test { return false; } final PhylogenyNode n23 = PhylogenyNode - .createInstanceFromNhxString( "n23/PIG_1", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n23/PIG_1", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n23.getName().equals( "n23/PIG_1" ) ) { return false; } @@ -4630,7 +4631,8 @@ public final class Test { } if ( NHXParser.LIMIT_SPECIES_NAMES_TO_FIVE_CHARS ) { final PhylogenyNode a = PhylogenyNode - .createInstanceFromNhxString( "n10_ECOLI/1-2", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n10_ECOLI/1-2", + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !a.getName().equals( "n10_ECOLI/1-2" ) ) { return false; } @@ -4639,7 +4641,7 @@ public final class Test { } final PhylogenyNode b = PhylogenyNode .createInstanceFromNhxString( "n10_ECOLI1/1-2", - ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !b.getName().equals( "n10_ECOLI1/1-2" ) ) { return false; } @@ -4648,7 +4650,7 @@ public final class Test { } final PhylogenyNode c = PhylogenyNode .createInstanceFromNhxString( "n10_RATAF12/1000-2000", - ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !c.getName().equals( "n10_RATAF12/1000-2000" ) ) { return false; } @@ -4656,7 +4658,8 @@ public final class Test { return false; } final PhylogenyNode d = PhylogenyNode - .createInstanceFromNhxString( "n10_RAT1/1-2", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n10_RAT1/1-2", + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !d.getName().equals( "n10_RAT1/1-2" ) ) { return false; } @@ -4664,7 +4667,7 @@ public final class Test { return false; } final PhylogenyNode e = PhylogenyNode - .createInstanceFromNhxString( "n10_RAT1", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "n10_RAT1", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !e.getName().equals( "n10_RAT1" ) ) { return false; } @@ -4674,7 +4677,7 @@ public final class Test { } final PhylogenyNode n11 = PhylogenyNode .createInstanceFromNhxString( "n111111_ECOLI/jdj:0.4", - ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n11.getName().equals( "n111111_ECOLI/jdj" ) ) { return false; } @@ -4686,7 +4689,7 @@ public final class Test { } final PhylogenyNode n12 = PhylogenyNode .createInstanceFromNhxString( "n111111-ECOLI---/jdj:0.4", - ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n12.getName().equals( "n111111-ECOLI---/jdj" ) ) { return false; } @@ -4775,7 +4778,8 @@ public final class Test { return false; } final PhylogenyNode n13 = PhylogenyNode - .createInstanceFromNhxString( "blah_12345/1-2", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "blah_12345/1-2", + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n13.getName().equals( "blah_12345/1-2" ) ) { return false; } @@ -4783,7 +4787,8 @@ public final class Test { return false; } final PhylogenyNode n14 = PhylogenyNode - .createInstanceFromNhxString( "blah_12X45/1-2", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( "blah_12X45/1-2", + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n14.getName().equals( "blah_12X45/1-2" ) ) { return false; } @@ -4792,7 +4797,7 @@ public final class Test { } final PhylogenyNode n15 = PhylogenyNode .createInstanceFromNhxString( "something_wicked[123]", - ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n15.getName().equals( "something_wicked" ) ) { return false; } @@ -4804,7 +4809,7 @@ public final class Test { } final PhylogenyNode n16 = PhylogenyNode .createInstanceFromNhxString( "something_wicked2[9]", - ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n16.getName().equals( "something_wicked2" ) ) { return false; } @@ -4816,7 +4821,7 @@ public final class Test { } final PhylogenyNode n17 = PhylogenyNode .createInstanceFromNhxString( "something_wicked3[a]", - ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !n17.getName().equals( "something_wicked3" ) ) { return false; } @@ -4824,7 +4829,7 @@ public final class Test { return false; } final PhylogenyNode n18 = PhylogenyNode - .createInstanceFromNhxString( ":0.5[91]", ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .createInstanceFromNhxString( ":0.5[91]", PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); if ( !isEqual( n18.getDistanceToParent(), 0.5 ) ) { return false; } diff --git a/forester/java/src/org/forester/test/examples/Example1.java b/forester/java/src/org/forester/test/examples/Example1.java index 13b24d1..cd65138 100644 --- a/forester/java/src/org/forester/test/examples/Example1.java +++ b/forester/java/src/org/forester/test/examples/Example1.java @@ -31,8 +31,9 @@ import java.io.IOException; import org.forester.archaeopteryx.Archaeopteryx; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; -import org.forester.util.ForesterUtil; +import org.forester.phylogeny.PhylogenyMethods; public class Example1 { @@ -41,14 +42,14 @@ public class Example1 { final File treefile = new File( "/home/czmasek/tol_117_TEST.xml" ); PhylogenyParser parser = null; try { - parser = ForesterUtil.createParserDependingOnFileType( treefile, true ); + parser = ParserUtils.createParserDependingOnFileType( treefile, true ); } catch ( final IOException e ) { e.printStackTrace(); } Phylogeny[] phys = null; try { - phys = ForesterUtil.readPhylogenies( parser, treefile ); + phys = PhylogenyMethods.readPhylogenies( parser, treefile ); } catch ( final IOException e ) { e.printStackTrace(); diff --git a/forester/java/src/org/forester/test/examples/Example4.java b/forester/java/src/org/forester/test/examples/Example4.java index b363ab3..2c5cb17 100644 --- a/forester/java/src/org/forester/test/examples/Example4.java +++ b/forester/java/src/org/forester/test/examples/Example4.java @@ -30,8 +30,10 @@ import java.io.File; import java.io.IOException; import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; import org.forester.util.ForesterUtil; public class Example4 { @@ -41,14 +43,14 @@ public class Example4 { final File treefile = new File( "/home/czmasek/tol_117_TEST.xml" ); PhylogenyParser parser = null; try { - parser = ForesterUtil.createParserDependingOnFileType( treefile, true ); + parser = ParserUtils.createParserDependingOnFileType( treefile, true ); } catch ( final IOException e ) { e.printStackTrace(); } Phylogeny[] phys = null; try { - phys = ForesterUtil.readPhylogenies( parser, treefile ); + phys = PhylogenyMethods.readPhylogenies( parser, treefile ); } catch ( final IOException e ) { e.printStackTrace(); diff --git a/forester/java/src/org/forester/tools/PhylogenyDecorator.java b/forester/java/src/org/forester/tools/PhylogenyDecorator.java index ebc1a6a..24bbaf2 100644 --- a/forester/java/src/org/forester/tools/PhylogenyDecorator.java +++ b/forester/java/src/org/forester/tools/PhylogenyDecorator.java @@ -32,6 +32,7 @@ import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.forester.archaeopteryx.Util; import org.forester.io.parsers.nhx.NHXFormatException; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; @@ -90,63 +91,63 @@ public final class PhylogenyDecorator { } if ( new_values != null ) { if ( new_values.containsKey( TP_TAXONOMY_CODE ) ) { - ForesterUtil.ensurePresenceOfTaxonomy( node ); + Util.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setTaxonomyCode( new_values.get( TP_TAXONOMY_CODE ) ); } if ( new_values.containsKey( TP_TAXONOMY_ID ) && new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) { - ForesterUtil.ensurePresenceOfTaxonomy( node ); + Util.ensurePresenceOfTaxonomy( node ); node.getNodeData() .getTaxonomy() .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ), new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) ); } else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) { - ForesterUtil.ensurePresenceOfTaxonomy( node ); + Util.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy() .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) ); } if ( new_values.containsKey( TP_TAXONOMY_SN ) ) { - ForesterUtil.ensurePresenceOfTaxonomy( node ); + Util.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setScientificName( new_values.get( TP_TAXONOMY_SN ) ); } if ( new_values.containsKey( TP_TAXONOMY_CN ) ) { - ForesterUtil.ensurePresenceOfTaxonomy( node ); + Util.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setCommonName( new_values.get( TP_TAXONOMY_CN ) ); } if ( new_values.containsKey( TP_TAXONOMY_SYN ) ) { - ForesterUtil.ensurePresenceOfTaxonomy( node ); + Util.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().getSynonyms().add( new_values.get( TP_TAXONOMY_SYN ) ); } if ( new_values.containsKey( TP_SEQ_ACCESSION ) && new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); + Util.ensurePresenceOfSequence( node ); node.getNodeData() .getSequence() .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ), new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) ); } if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); + Util.ensurePresenceOfSequence( node ); final Annotation ann = new Annotation( "?" ); ann.setDesc( new_values.get( TP_SEQ_ANNOTATION_DESC ) ); node.getNodeData().getSequence().addAnnotation( ann ); } if ( new_values.containsKey( TP_SEQ_ANNOTATION_REF ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); + Util.ensurePresenceOfSequence( node ); final Annotation ann = new Annotation( new_values.get( TP_SEQ_ANNOTATION_REF ) ); node.getNodeData().getSequence().addAnnotation( ann ); } if ( new_values.containsKey( TP_SEQ_SYMBOL ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); + Util.ensurePresenceOfSequence( node ); node.getNodeData().getSequence().setSymbol( new_values.get( TP_SEQ_SYMBOL ) ); } if ( new_values.containsKey( TP_SEQ_NAME ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); + Util.ensurePresenceOfSequence( node ); node.getNodeData().getSequence().setName( new_values.get( TP_SEQ_NAME ) ); } if ( new_values.containsKey( TP_SEQ_MOL_SEQ ) ) { - ForesterUtil.ensurePresenceOfSequence( node ); + Util.ensurePresenceOfSequence( node ); node.getNodeData().getSequence().setMolecularSequence( new_values.get( TP_SEQ_MOL_SEQ ) ); } if ( new_values.containsKey( TP_NODE_NAME ) ) { @@ -274,14 +275,14 @@ public final class PhylogenyDecorator { if ( PhylogenyDecorator.VERBOSE ) { System.out.println( name + ": " + new_value ); } - ForesterUtil.ensurePresenceOfTaxonomy( node ); + Util.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setTaxonomyCode( new_value ); break; case TAXONOMY_SCIENTIFIC_NAME: if ( PhylogenyDecorator.VERBOSE ) { System.out.println( name + ": " + new_value ); } - ForesterUtil.ensurePresenceOfTaxonomy( node ); + Util.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setScientificName( new_value ); break; case SEQUENCE_NAME: @@ -414,7 +415,7 @@ public final class PhylogenyDecorator { private static void extractBracketedScientificNames( final PhylogenyNode node, final String new_value ) { final int i = new_value.lastIndexOf( "[" ); final String scientific_name = new_value.substring( i + 1, new_value.length() - 1 ); - ForesterUtil.ensurePresenceOfTaxonomy( node ); + Util.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setScientificName( scientific_name ); } diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index 64f3f81..78b01ea 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -48,7 +48,6 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; -import java.util.Hashtable; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -57,28 +56,8 @@ import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; -import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.forester.io.parsers.PhylogenyParser; -import org.forester.io.parsers.nexus.NexusPhylogeniesParser; -import org.forester.io.parsers.nhx.NHXParser; -import org.forester.io.parsers.phyloxml.PhyloXmlParser; -import org.forester.io.parsers.phyloxml.PhyloXmlUtil; -import org.forester.io.parsers.tol.TolParser; -import org.forester.io.parsers.util.PhylogenyParserException; -import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyMethods; -import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.data.Confidence; -import org.forester.phylogeny.data.Distribution; -import org.forester.phylogeny.data.Identifier; -import org.forester.phylogeny.data.Sequence; -import org.forester.phylogeny.data.Taxonomy; -import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; -import org.forester.phylogeny.factories.PhylogenyFactory; -import org.forester.phylogeny.iterators.PhylogenyNodeIterator; - public final class ForesterUtil { public final static String FILE_SEPARATOR = System.getProperty( "file.separator" ); @@ -298,203 +277,6 @@ public final class ForesterUtil { return file; } - final public static PhylogenyParser createParserDependingFileContents( final File file, - final boolean phyloxml_validate_against_xsd ) - throws FileNotFoundException, IOException { - PhylogenyParser parser = null; - final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase(); - if ( first_line.startsWith( "<" ) ) { - parser = new PhyloXmlParser(); - if ( phyloxml_validate_against_xsd ) { - final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); - final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); - if ( xsd_url != null ) { - ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); - } - else { - if ( ForesterConstants.RELEASE ) { - throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" - + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); - } - } - } - } - else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) - || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { - parser = new NexusPhylogeniesParser(); - } - else { - parser = new NHXParser(); - } - return parser; - } - - final public static PhylogenyParser createParserDependingOnFileType( final File file, - final boolean phyloxml_validate_against_xsd ) - throws FileNotFoundException, IOException { - PhylogenyParser parser = null; - parser = createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd ); - if ( parser == null ) { - parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd ); - } - return parser; - } - - /** - * Return null if it can not guess the parser to use based on name suffix. - * - * @param filename - * @return - */ - final public static PhylogenyParser createParserDependingOnSuffix( final String filename, - final boolean phyloxml_validate_against_xsd ) { - PhylogenyParser parser = null; - final String filename_lc = filename.toLowerCase(); - if ( filename_lc.endsWith( ".tol" ) || filename_lc.endsWith( ".tolxml" ) || filename_lc.endsWith( ".tol.zip" ) ) { - parser = new TolParser(); - } - else if ( filename_lc.endsWith( ".xml" ) || filename_lc.endsWith( ".px" ) || filename_lc.endsWith( "phyloxml" ) - || filename_lc.endsWith( ".zip" ) ) { - parser = new PhyloXmlParser(); - if ( phyloxml_validate_against_xsd ) { - final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); - final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); - if ( xsd_url != null ) { - ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); - } - else { - if ( ForesterConstants.RELEASE ) { - throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" - + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); - } - } - } - } - else if ( filename_lc.endsWith( ".nexus" ) || filename_lc.endsWith( ".nex" ) || filename_lc.endsWith( ".nx" ) ) { - parser = new NexusPhylogeniesParser(); - } - else if ( filename_lc.endsWith( ".nhx" ) || filename_lc.endsWith( ".nh" ) || filename_lc.endsWith( ".newick" ) ) { - parser = new NHXParser(); - } - return parser; - } - - final public static PhylogenyParser createParserDependingOnUrlContents( final URL url, - final boolean phyloxml_validate_against_xsd ) - throws FileNotFoundException, IOException { - final String lc_filename = url.getFile().toString().toLowerCase(); - PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd ); - if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) { - if ( parser instanceof PhyloXmlParser ) { - ( ( PhyloXmlParser ) parser ).setZippedInputstream( true ); - } - else if ( parser instanceof TolParser ) { - ( ( TolParser ) parser ).setZippedInputstream( true ); - } - } - if ( parser == null ) { - final String first_line = getFirstLine( url ).trim().toLowerCase(); - if ( first_line.startsWith( "<" ) ) { - parser = new PhyloXmlParser(); - if ( phyloxml_validate_against_xsd ) { - final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); - final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); - if ( xsd_url != null ) { - ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); - } - else { - throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" - + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); - } - } - } - else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) - || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { - parser = new NexusPhylogeniesParser(); - } - else { - parser = new NHXParser(); - } - } - return parser; - } - - final public static void ensurePresenceOfDate( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasDate() ) { - node.getNodeData().setDate( new org.forester.phylogeny.data.Date() ); - } - } - - final public static void ensurePresenceOfDistribution( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasDistribution() ) { - node.getNodeData().setDistribution( new Distribution( "" ) ); - } - } - - public static void ensurePresenceOfSequence( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasSequence() ) { - node.getNodeData().setSequence( new Sequence() ); - } - } - - public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasTaxonomy() ) { - node.getNodeData().setTaxonomy( new Taxonomy() ); - } - } - - /** - * Extracts a code if and only if: - * one and only one _, - * shorter than 25, - * no |, - * no ., - * if / present it has to be after the _, - * if PFAM_STYLE_ONLY: / must be present, - * tax code can only contain uppercase letters and numbers, - * and must contain at least one uppercase letter. - * Return null if no code extractable. - * - * @param name - * @param limit_to_five - * @return - */ - public static String extractTaxonomyCodeFromNodeName( final String name, - final boolean limit_to_five, - final ForesterUtil.TAXONOMY_EXTRACTION taxonomy_extraction ) { - if ( ( name.indexOf( "_" ) > 0 ) - && ( name.length() < 25 ) - && ( name.lastIndexOf( "_" ) == name.indexOf( "_" ) ) - && ( name.indexOf( "|" ) < 0 ) - && ( name.indexOf( "." ) < 0 ) - && ( ( taxonomy_extraction != ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) || ( name - .indexOf( "/" ) >= 0 ) ) - && ( ( ( name.indexOf( "/" ) ) < 0 ) || ( name.indexOf( "/" ) > name.indexOf( "_" ) ) ) ) { - final String[] s = name.split( "[_/]" ); - if ( s.length > 1 ) { - String str = s[ 1 ]; - if ( limit_to_five ) { - if ( str.length() > 5 ) { - str = str.substring( 0, 5 ); - } - else if ( ( str.length() < 5 ) && ( str.startsWith( "RAT" ) || str.startsWith( "PIG" ) ) ) { - str = str.substring( 0, 3 ); - } - } - final Matcher letters_and_numbers = NHXParser.UC_LETTERS_NUMBERS_PATTERN.matcher( str ); - if ( !letters_and_numbers.matches() ) { - return null; - } - final Matcher numbers_only = NHXParser.NUMBERS_ONLY_PATTERN.matcher( str ); - if ( numbers_only.matches() ) { - return null; - } - return str; - } - } - return null; - } - public static void fatalError( final String prg_name, final String message ) { System.err.println(); System.err.println( "[" + prg_name + "] > " + message ); @@ -603,27 +385,6 @@ public final class ForesterUtil { return ForesterUtil.LINE_SEPARATOR; } - /** - * Returns all custom data tag names of this Phylogeny as Hashtable. Tag - * names are keys, values are Boolean set to false. - */ - final public static Hashtable getPropertyRefs( final Phylogeny phylogeny ) { - final Hashtable ht = new Hashtable(); - if ( phylogeny.isEmpty() ) { - return ht; - } - for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode current_node = iter.next(); - if ( current_node.getNodeData().isHasProperties() ) { - final String[] tags = current_node.getNodeData().getProperties().getPropertyRefs(); - for( int i = 0; i < tags.length; ++i ) { - ht.put( tags[ i ], new Boolean( false ) ); - } - } - } - return ht; - } - final public static void increaseCountingMap( final Map counting_map, final String item_name ) { if ( !counting_map.containsKey( item_name ) ) { counting_map.put( item_name, 1 ); @@ -633,28 +394,6 @@ public final class ForesterUtil { } } - final static public boolean isAllNonEmptyInternalLabelsArePositiveNumbers( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isRoot() && !n.isExternal() ) { - if ( !ForesterUtil.isEmpty( n.getName() ) ) { - double d = -1.0; - try { - d = Double.parseDouble( n.getName() ); - } - catch ( final Exception e ) { - d = -1.0; - } - if ( d < 0.0 ) { - return false; - } - } - } - } - return true; - } - final public static boolean isContainsParanthesesableNhCharacter( final String nh ) { return PARANTHESESABLE_NH_CHARS_PATTERN.matcher( nh ).find(); } @@ -695,42 +434,6 @@ public final class ForesterUtil { return n % 2 == 0; } - final static public boolean isHasAtLeastNodeWithEvent( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - if ( it.next().getNodeData().isHasEvent() ) { - return true; - } - } - return false; - } - - /** - * Returns true if at least one branch has a length larger than zero. - * - * - * @param phy - */ - final static public boolean isHasAtLeastOneBranchLengthLargerThanZero( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - if ( it.next().getDistanceToParent() > 0.0 ) { - return true; - } - } - return false; - } - - final static public boolean isHasAtLeastOneBranchWithSupportValues( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - if ( it.next().getBranchData().isHasConfidences() ) { - return true; - } - } - return false; - } - /** * This determines whether String[] a and String[] b have at least one * String in common (intersect). Returns false if at least one String[] is @@ -939,16 +642,6 @@ public final class ForesterUtil { return Integer.parseInt( str ); } - final public static void postOrderRelabelInternalNodes( final Phylogeny phylogeny, final int starting_number ) { - int i = starting_number; - for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) { - final PhylogenyNode node = it.next(); - if ( !node.isExternal() ) { - node.setName( String.valueOf( i++ ) ); - } - } - } - final public static void printArray( final Object[] a ) { for( int i = 0; i < a.length; ++i ) { System.out.println( "[" + i + "]=" + a[ i ] ); @@ -1005,15 +698,6 @@ public final class ForesterUtil { System.out.println( "[" + prg_name + "] > " + message ); } - public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny[] trees = factory.create( file, parser ); - if ( ( trees == null ) || ( trees.length == 0 ) ) { - throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); - } - return trees; - } - final public static String removeSuffix( final String file_name ) { final int i = file_name.lastIndexOf( '.' ); if ( i > 1 ) { @@ -1135,135 +819,6 @@ public final class ForesterUtil { return null; } - final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) { - double value = -1; - try { - value = Double.parseDouble( n.getName() ); - } - catch ( final NumberFormatException e ) { - throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: " - + e.getLocalizedMessage() ); - } - if ( value >= 0.0 ) { - n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) ); - n.setName( "" ); - } - } - } - } - - final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isRoot() && !n.isExternal() && !n.getBranchData().isHasConfidences() ) { - if ( !ForesterUtil.isEmpty( n.getName() ) ) { - double d = -1.0; - try { - d = Double.parseDouble( n.getName() ); - } - catch ( final Exception e ) { - d = -1.0; - } - if ( d >= 0.0 ) { - n.getBranchData().addConfidence( new Confidence( d, "" ) ); - n.setName( "" ); - } - } - } - } - } - - final static public void transferNodeNameToField( final Phylogeny phy, final PhylogenyNodeField field ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - final String name = n.getName().trim(); - if ( !ForesterUtil.isEmpty( name ) ) { - switch ( field ) { - case TAXONOMY_CODE: - //temp hack - // if ( name.length() > 5 ) { - // n.setName( "" ); - // if ( !n.getNodeData().isHasTaxonomy() ) { - // n.getNodeData().setTaxonomy( new Taxonomy() ); - // } - // n.getNodeData().getTaxonomy().setScientificName( name ); - // break; - // } - // - n.setName( "" ); - PhylogenyMethods.setTaxonomyCode( n, name ); - break; - case TAXONOMY_SCIENTIFIC_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setScientificName( name ); - break; - case TAXONOMY_COMMON_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setCommonName( name ); - break; - case SEQUENCE_SYMBOL: - n.setName( "" ); - if ( !n.getNodeData().isHasSequence() ) { - n.getNodeData().setSequence( new Sequence() ); - } - n.getNodeData().getSequence().setSymbol( name ); - break; - case SEQUENCE_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasSequence() ) { - n.getNodeData().setSequence( new Sequence() ); - } - n.getNodeData().getSequence().setName( name ); - break; - case TAXONOMY_ID_UNIPROT_1: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - String id = name; - final int i = name.indexOf( '_' ); - if ( i > 0 ) { - id = name.substring( 0, i ); - } - else { - n.setName( "" ); - } - n.getNodeData().getTaxonomy() - .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); - break; - } - case TAXONOMY_ID_UNIPROT_2: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - String id = name; - final int i = name.indexOf( '_' ); - if ( i > 0 ) { - id = name.substring( i + 1, name.length() ); - } - else { - n.setName( "" ); - } - n.getNodeData().getTaxonomy() - .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); - break; - } - } - } - } - } - final public static void unexpectedFatalError( final String prg_name, final Exception e ) { System.err.println(); System.err.println( "[" + prg_name @@ -1320,19 +875,4 @@ public final class ForesterUtil { } return sb.toString(); } - - public static enum PhylogenyNodeField { - CLADE_NAME, - TAXONOMY_CODE, - TAXONOMY_SCIENTIFIC_NAME, - TAXONOMY_COMMON_NAME, - SEQUENCE_SYMBOL, - SEQUENCE_NAME, - TAXONOMY_ID_UNIPROT_1, - TAXONOMY_ID_UNIPROT_2; - } - - public static enum TAXONOMY_EXTRACTION { - NO, YES, PFAM_STYLE_ONLY; - } }