X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Fphyloxml_converter.java;h=164bfeef1fc2d2f81a4ef8f1159ecebde56e43eb;hb=504b2b133e9814ac9ee966dc04a1408c455c6a2f;hp=d2c0a5cd283aed3cc5cf33dfcf83158332a250c0;hpb=eee996a6476a1e3d84c07f8f690dcde3ff4b2ef5;p=jalview.git diff --git a/forester/java/src/org/forester/application/phyloxml_converter.java b/forester/java/src/org/forester/application/phyloxml_converter.java index d2c0a5c..164bfee 100644 --- a/forester/java/src/org/forester/application/phyloxml_converter.java +++ b/forester/java/src/org/forester/application/phyloxml_converter.java @@ -22,7 +22,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; @@ -34,16 +34,18 @@ import java.util.List; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; +import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; +import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; +import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; -import org.forester.util.ForesterUtil.PhylogenyNodeField; public class phyloxml_converter { @@ -66,14 +68,15 @@ public class phyloxml_converter { final static private String ORDER_SUBTREES = "o"; final static private String NO_TREE_LEVEL_INDENDATION = "ni"; final static private String REPLACE_UNDER_SCORES = "ru"; + final static private String IGNORE_QUOTES = "iqs"; final static private String PRG_NAME = "phyloxml_converter"; - final static private String PRG_VERSION = "1.30"; - final static private String PRG_DATE = "2011.03.01"; + final static private String PRG_VERSION = "1.301"; + final static private String PRG_DATE = "2012.08.31"; final static private String E_MAIL = "phylosoft@gmail.com"; final static private String WWW = "www.phylosoft.org/forester/"; final static private boolean SPECIAL = false; - public static void main( final String args[] ) { + public static void main( final String args[] ) throws PhyloXmlDataFormatException { ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW ); CommandLineArguments cla = null; try { @@ -102,6 +105,7 @@ public class phyloxml_converter { allowed_options.add( REPLACE_UNDER_SCORES ); allowed_options.add( EXTRACT_TAXONOMY ); allowed_options.add( EXTRACT_TAXONOMY_PF ); + allowed_options.add( IGNORE_QUOTES ); if ( cla.getNumberOfNames() != 2 ) { System.out.println(); System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" ); @@ -125,30 +129,30 @@ public class phyloxml_converter { System.exit( -1 ); } final String field_option_value = cla.getOptionValue( FIELD_OPTION ); - PhylogenyNodeField field = null; + PhylogenyMethods.PhylogenyNodeField field = null; if ( field_option_value.equals( FIELD_CLADE_NAME ) ) { - field = PhylogenyNodeField.CLADE_NAME; + field = PhylogenyMethods.PhylogenyNodeField.CLADE_NAME; } else if ( field_option_value.equals( FIELD_TAXONOMY_CODE ) ) { - field = PhylogenyNodeField.TAXONOMY_CODE; + field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE; } else if ( field_option_value.equals( FIELD_TAXONOMY_SCI_NAME ) ) { - field = PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME; + field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME; } else if ( field_option_value.equals( FIELD_TAXONOMY_COMM_NAME ) ) { - field = PhylogenyNodeField.TAXONOMY_COMMON_NAME; + field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_COMMON_NAME; } else if ( field_option_value.equals( FIELD_SEQUENCE_GENE_NAME ) ) { - field = PhylogenyNodeField.SEQUENCE_NAME; + field = PhylogenyMethods.PhylogenyNodeField.SEQUENCE_NAME; } else if ( field_option_value.equals( FIELD_SEQUENCE_SYMBOL ) ) { - field = PhylogenyNodeField.SEQUENCE_SYMBOL; + field = PhylogenyMethods.PhylogenyNodeField.SEQUENCE_SYMBOL; } else if ( field_option_value.equals( FIELD_UNIPROT_TAXONOMY_ID_SPLIT_1 ) ) { - field = PhylogenyNodeField.TAXONOMY_ID_UNIPROT_1; + field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID_UNIPROT_1; } else if ( field_option_value.equals( FIELD_UNIPROT_TAXONOMY_ID_SPLIT_2 ) ) { - field = PhylogenyNodeField.TAXONOMY_ID_UNIPROT_2; + field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID_UNIPROT_2; } else if ( field_option_value.equals( FIELD_DUMMY ) ) { } @@ -156,6 +160,10 @@ public class phyloxml_converter { ForesterUtil.fatalError( PRG_NAME, "unknown value for -\"" + FIELD_OPTION + "\" option: \"" + field_option_value + "\"" ); } + boolean ignore_quotes = false; + if ( cla.isOptionSet( IGNORE_QUOTES ) ) { + ignore_quotes = true; + } boolean int_values_are_boots = false; if ( cla.isOptionSet( INTERNAL_NAMES_ARE_BOOT_SUPPPORT ) ) { int_values_are_boots = true; @@ -195,26 +203,27 @@ public class phyloxml_converter { Phylogeny[] phys = null; try { final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final PhylogenyParser parser = ForesterUtil.createParserDependingOnFileType( infile, true ); + final PhylogenyParser parser = ParserUtils.createParserDependingOnFileType( infile, true ); if ( parser instanceof NHXParser ) { - if ( ( field != PhylogenyNodeField.TAXONOMY_CODE ) - && ( field != PhylogenyNodeField.TAXONOMY_COMMON_NAME ) - && ( field != PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME ) ) { + if ( ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE ) + && ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_COMMON_NAME ) + && ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME ) ) { if ( extr_taxonomy_pf_only ) { ( ( NHXParser ) parser ) - .setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); replace_underscores = false; } else if ( extr_taxonomy ) { - ( ( NHXParser ) parser ).setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.YES ); + ( ( NHXParser ) parser ) + .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); replace_underscores = false; } } else { - ( ( NHXParser ) parser ).setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.NO ); + ( ( NHXParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); } ( ( NHXParser ) parser ).setReplaceUnderscores( replace_underscores ); - ( ( NHXParser ) parser ).setIgnoreQuotes( false ); + ( ( NHXParser ) parser ).setIgnoreQuotes( ignore_quotes ); } else if ( parser instanceof NexusPhylogeniesParser ) { ( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( replace_underscores ); @@ -232,12 +241,12 @@ public class phyloxml_converter { } if ( int_values_are_boots ) { for( final Phylogeny phy : phys ) { - ForesterUtil.transferInternalNamesToBootstrapSupport( phy ); + PhylogenyMethods.transferInternalNamesToBootstrapSupport( phy ); } } if ( field != null ) { for( final Phylogeny phy : phys ) { - ForesterUtil.transferNodeNameToField( phy, field ); + PhylogenyMethods.transferNodeNameToField( phy, field, false ); } } if ( midpoint_reroot ) { @@ -253,7 +262,10 @@ public class phyloxml_converter { } if ( order_subtrees ) { for( final Phylogeny phy : phys ) { - phy.orderAppearance( true ); + PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.TAXONOMY ); + phy.externalNodesHaveChanged(); + phy.clearHashIdToNodeMap(); + phy.recalculateNumberOfExternalDescendants( true ); } } try { @@ -372,21 +384,22 @@ public class phyloxml_converter { System.out.println(); System.out.println( " options: " ); System.out.println( " -" + INTERNAL_NAMES_ARE_BOOT_SUPPPORT - + " : internal names in NH or NHX tree are bootstrap support values" ); - System.out.println( " -" + REPLACE_UNDER_SCORES + ": replace all underscores with spaces" ); - System.out.println( " -" + MIDPOINT_REROOT + " : midpoint reroot" ); - System.out.println( " -" + ORDER_SUBTREES + " : order subtrees" ); + + " : internal names in NH or NHX tree are bootstrap support values" ); + System.out.println( " -" + REPLACE_UNDER_SCORES + " : replace all underscores with spaces" ); + System.out.println( " -" + MIDPOINT_REROOT + " : midpoint reroot" ); + System.out.println( " -" + ORDER_SUBTREES + " : order subtrees" ); System.out .println( " -" + EXTRACT_TAXONOMY - + ": extract taxonomy to taxonomy code from \"seqname_TAXON\"-style names (cannot be used with the following field options: " + + " : extract taxonomy to taxonomy code from \"seqname_TAXON\"-style names (cannot be used with the following field options: " + FIELD_TAXONOMY_CODE + ", " + FIELD_TAXONOMY_COMM_NAME + ", " + FIELD_TAXONOMY_SCI_NAME + ")" ); System.out .println( " -" + EXTRACT_TAXONOMY_PF - + ": extract taxonomy to taxonomy code from Pfam (\"seqname_TAXON/x-y\") style names only (cannot be used with the following field options: " + + " : extract taxonomy to taxonomy code from Pfam (\"seqname_TAXON/x-y\") style names only (cannot be used with the following field options: " + FIELD_TAXONOMY_CODE + ", " + FIELD_TAXONOMY_COMM_NAME + ", " + FIELD_TAXONOMY_SCI_NAME + ")" ); - System.out.println( " -" + NO_TREE_LEVEL_INDENDATION + ": no tree level indendation in phyloXML output" ); + System.out.println( " -" + NO_TREE_LEVEL_INDENDATION + " : no tree level indendation in phyloXML output" ); + System.out.println( " -" + IGNORE_QUOTES + ": ignore quotes and whitespace (e.g. \"a b\" becomes ab)" ); System.out.println(); } }