X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Fphyloxml_converter.java;h=d258bfc1ecc0a59d9dc5e13560a2e97d6d80cb15;hb=327fc7c7a9a46cbe8983b0ca2a54cc6f45cd1695;hp=6d4520732bbec2cfcbf5e1a104af8bfa7b823e4e;hpb=7f4318a3ef37864b5453e3cd56270b8e91e76b9f;p=jalview.git diff --git a/forester/java/src/org/forester/application/phyloxml_converter.java b/forester/java/src/org/forester/application/phyloxml_converter.java index 6d45207..d258bfc 100644 --- a/forester/java/src/org/forester/application/phyloxml_converter.java +++ b/forester/java/src/org/forester/application/phyloxml_converter.java @@ -22,7 +22,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; @@ -34,6 +34,7 @@ import java.util.List; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.nexus.NexusPhylogeniesParser; import org.forester.io.parsers.nhx.NHXParser; +import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; @@ -67,14 +68,15 @@ public class phyloxml_converter { final static private String ORDER_SUBTREES = "o"; final static private String NO_TREE_LEVEL_INDENDATION = "ni"; final static private String REPLACE_UNDER_SCORES = "ru"; + final static private String IGNORE_QUOTES = "iqs"; final static private String PRG_NAME = "phyloxml_converter"; - final static private String PRG_VERSION = "1.30"; - final static private String PRG_DATE = "2011.03.01"; - final static private String E_MAIL = "phylosoft@gmail.com"; - final static private String WWW = "www.phylosoft.org/forester/"; + final static private String PRG_VERSION = "1.302"; + final static private String PRG_DATE = "140516"; + final static private String E_MAIL = "phyloxml@gmail.com"; + final static private String WWW = "sites.google.com/site/cmzmasek/home/software/forester"; final static private boolean SPECIAL = false; - public static void main( final String args[] ) { + public static void main( final String args[] ) throws PhyloXmlDataFormatException { ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW ); CommandLineArguments cla = null; try { @@ -103,6 +105,7 @@ public class phyloxml_converter { allowed_options.add( REPLACE_UNDER_SCORES ); allowed_options.add( EXTRACT_TAXONOMY ); allowed_options.add( EXTRACT_TAXONOMY_PF ); + allowed_options.add( IGNORE_QUOTES ); if ( cla.getNumberOfNames() != 2 ) { System.out.println(); System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" ); @@ -157,6 +160,10 @@ public class phyloxml_converter { ForesterUtil.fatalError( PRG_NAME, "unknown value for -\"" + FIELD_OPTION + "\" option: \"" + field_option_value + "\"" ); } + boolean ignore_quotes = false; + if ( cla.isOptionSet( IGNORE_QUOTES ) ) { + ignore_quotes = true; + } boolean int_values_are_boots = false; if ( cla.isOptionSet( INTERNAL_NAMES_ARE_BOOT_SUPPPORT ) ) { int_values_are_boots = true; @@ -203,19 +210,20 @@ public class phyloxml_converter { && ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME ) ) { if ( extr_taxonomy_pf_only ) { ( ( NHXParser ) parser ) - .setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ); + .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); replace_underscores = false; } else if ( extr_taxonomy ) { - ( ( NHXParser ) parser ).setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.YES ); + ( ( NHXParser ) parser ) + .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); replace_underscores = false; } } else { - ( ( NHXParser ) parser ).setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.NO ); + ( ( NHXParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO ); } ( ( NHXParser ) parser ).setReplaceUnderscores( replace_underscores ); - ( ( NHXParser ) parser ).setIgnoreQuotes( false ); + ( ( NHXParser ) parser ).setIgnoreQuotes( ignore_quotes ); } else if ( parser instanceof NexusPhylogeniesParser ) { ( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( replace_underscores ); @@ -238,7 +246,7 @@ public class phyloxml_converter { } if ( field != null ) { for( final Phylogeny phy : phys ) { - PhylogenyMethods.transferNodeNameToField( phy, field ); + PhylogenyMethods.transferNodeNameToField( phy, field, false ); } } if ( midpoint_reroot ) { @@ -256,7 +264,7 @@ public class phyloxml_converter { for( final Phylogeny phy : phys ) { PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.TAXONOMY ); phy.externalNodesHaveChanged(); - phy.hashIDs(); + phy.clearHashIdToNodeMap(); phy.recalculateNumberOfExternalDescendants( true ); } } @@ -359,38 +367,38 @@ public class phyloxml_converter { System.out.println(); System.out.println( " field options: " ); System.out.println(); - System.out.println( " " + FIELD_CLADE_NAME + ": transfer name to node/clade name" ); - System.out.println( " " + FIELD_TAXONOMY_CODE + ": transfer name to taxonomy code" ); - System.out.println( " " + FIELD_TAXONOMY_SCI_NAME + ": transfer name to taxonomy scientific name" ); - System.out.println( " " + FIELD_TAXONOMY_COMM_NAME + ": transfer name to taxonomy common name" ); - System.out.println( " " + FIELD_SEQUENCE_GENE_NAME + ": transfer name to sequence name" ); - System.out.println( " " + FIELD_SEQUENCE_SYMBOL + ": transfer name to sequence symbol" ); - System.out - .println( " " - + FIELD_UNIPROT_TAXONOMY_ID_SPLIT_1 - + ": transfer/split name to taxonomy uniprot identifier\n (split at underscore if \"id_name\" pattern, e.g. \"817_SusD\")" ); - System.out - .println( " " - + FIELD_UNIPROT_TAXONOMY_ID_SPLIT_2 - + ": transfer/split name to taxonomy uniprot identifier\n (split at underscore if \"name_id\" pattern, e.g. \"SusD_817\")" ); + System.out.println( " " + FIELD_CLADE_NAME + ": transfer name to node/clade name" ); + System.out.println( " " + FIELD_TAXONOMY_CODE + ": transfer name to taxonomy code" ); + System.out.println( " " + FIELD_TAXONOMY_SCI_NAME + ": transfer name to taxonomy scientific name" ); + System.out.println( " " + FIELD_TAXONOMY_COMM_NAME + ": transfer name to taxonomy common name" ); + System.out.println( " " + FIELD_SEQUENCE_GENE_NAME + ": transfer name to sequence name" ); + System.out.println( " " + FIELD_SEQUENCE_SYMBOL + ": transfer name to sequence symbol" ); + System.out.println( " " + FIELD_DUMMY + ": to convert NHX formatted trees to phyloXML" ); + System.out.println( " " + FIELD_UNIPROT_TAXONOMY_ID_SPLIT_1 + + ": transfer/split name to taxonomy uniprot identifier" ); + System.out.println( " (split at underscore if \"id_name\" pattern, e.g. \"817_SusD\")" ); + System.out.println( " " + FIELD_UNIPROT_TAXONOMY_ID_SPLIT_2 + + ": transfer/split name to taxonomy uniprot identifier" ); + System.out.println( " (split at underscore if \"name_id\" pattern, e.g. \"SusD_817\")" ); System.out.println(); System.out.println( " options: " ); System.out.println( " -" + INTERNAL_NAMES_ARE_BOOT_SUPPPORT - + " : internal names in NH or NHX tree are bootstrap support values" ); - System.out.println( " -" + REPLACE_UNDER_SCORES + ": replace all underscores with spaces" ); - System.out.println( " -" + MIDPOINT_REROOT + " : midpoint reroot" ); - System.out.println( " -" + ORDER_SUBTREES + " : order subtrees" ); + + " : internal names in NH or NHX tree are bootstrap support values" ); + System.out.println( " -" + REPLACE_UNDER_SCORES + " : replace all underscores with spaces" ); + System.out.println( " -" + MIDPOINT_REROOT + " : midpoint reroot" ); + System.out.println( " -" + ORDER_SUBTREES + " : order subtrees" ); System.out .println( " -" + EXTRACT_TAXONOMY - + ": extract taxonomy to taxonomy code from \"seqname_TAXON\"-style names (cannot be used with the following field options: " + + " : extract taxonomy to taxonomy code from \"seqname_TAXON\"-style names (cannot be used with the following field options: " + FIELD_TAXONOMY_CODE + ", " + FIELD_TAXONOMY_COMM_NAME + ", " + FIELD_TAXONOMY_SCI_NAME + ")" ); System.out .println( " -" + EXTRACT_TAXONOMY_PF - + ": extract taxonomy to taxonomy code from Pfam (\"seqname_TAXON/x-y\") style names only (cannot be used with the following field options: " + + " : extract taxonomy to taxonomy code from Pfam (\"seqname_TAXON/x-y\") style names only (cannot be used with the following field options: " + FIELD_TAXONOMY_CODE + ", " + FIELD_TAXONOMY_COMM_NAME + ", " + FIELD_TAXONOMY_SCI_NAME + ")" ); - System.out.println( " -" + NO_TREE_LEVEL_INDENDATION + ": no tree level indendation in phyloXML output" ); + System.out.println( " -" + NO_TREE_LEVEL_INDENDATION + " : no tree level indendation in phyloXML output" ); + System.out.println( " -" + IGNORE_QUOTES + ": ignore quotes and whitespace (e.g. \"a b\" becomes ab)" ); System.out.println(); } }