in progress (special coloring is still true)
[jalview.git] / forester / java / src / org / forester / application / phyloxml_converter.java
index bee52a0..d258bfc 100644 (file)
@@ -22,7 +22,7 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
 
 package org.forester.application;
 
@@ -34,6 +34,7 @@ import java.util.List;
 import org.forester.io.parsers.PhylogenyParser;
 import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
 import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
 import org.forester.io.parsers.util.ParserUtils;
 import org.forester.io.writers.PhylogenyWriter;
 import org.forester.phylogeny.Phylogeny;
@@ -67,14 +68,15 @@ public class phyloxml_converter {
     final static private String  ORDER_SUBTREES                    = "o";
     final static private String  NO_TREE_LEVEL_INDENDATION         = "ni";
     final static private String  REPLACE_UNDER_SCORES              = "ru";
+    final static private String  IGNORE_QUOTES                     = "iqs";
     final static private String  PRG_NAME                          = "phyloxml_converter";
-    final static private String  PRG_VERSION                       = "1.30";
-    final static private String  PRG_DATE                          = "2011.03.01";
-    final static private String  E_MAIL                            = "phylosoft@gmail.com";
-    final static private String  WWW                               = "www.phylosoft.org/forester/";
+    final static private String  PRG_VERSION                       = "1.302";
+    final static private String  PRG_DATE                          = "140516";
+    final static private String  E_MAIL                            = "phyloxml@gmail.com";
+    final static private String  WWW                               = "sites.google.com/site/cmzmasek/home/software/forester";
     final static private boolean SPECIAL                           = false;
 
-    public static void main( final String args[] ) {
+    public static void main( final String args[] ) throws PhyloXmlDataFormatException {
         ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
         CommandLineArguments cla = null;
         try {
@@ -103,6 +105,7 @@ public class phyloxml_converter {
         allowed_options.add( REPLACE_UNDER_SCORES );
         allowed_options.add( EXTRACT_TAXONOMY );
         allowed_options.add( EXTRACT_TAXONOMY_PF );
+        allowed_options.add( IGNORE_QUOTES );
         if ( cla.getNumberOfNames() != 2 ) {
             System.out.println();
             System.out.println( "[" + PRG_NAME + "] incorrect number of arguments" );
@@ -157,6 +160,10 @@ public class phyloxml_converter {
             ForesterUtil.fatalError( PRG_NAME, "unknown value for -\"" + FIELD_OPTION + "\" option: \""
                     + field_option_value + "\"" );
         }
+        boolean ignore_quotes = false;
+        if ( cla.isOptionSet( IGNORE_QUOTES ) ) {
+            ignore_quotes = true;
+        }
         boolean int_values_are_boots = false;
         if ( cla.isOptionSet( INTERNAL_NAMES_ARE_BOOT_SUPPPORT ) ) {
             int_values_are_boots = true;
@@ -203,19 +210,20 @@ public class phyloxml_converter {
                         && ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME ) ) {
                     if ( extr_taxonomy_pf_only ) {
                         ( ( NHXParser ) parser )
-                                .setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+                                .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
                         replace_underscores = false;
                     }
                     else if ( extr_taxonomy ) {
-                        ( ( NHXParser ) parser ).setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.YES );
+                        ( ( NHXParser ) parser )
+                                .setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
                         replace_underscores = false;
                     }
                 }
                 else {
-                    ( ( NHXParser ) parser ).setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.NO );
+                    ( ( NHXParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO );
                 }
                 ( ( NHXParser ) parser ).setReplaceUnderscores( replace_underscores );
-                ( ( NHXParser ) parser ).setIgnoreQuotes( false );
+                ( ( NHXParser ) parser ).setIgnoreQuotes( ignore_quotes );
             }
             else if ( parser instanceof NexusPhylogeniesParser ) {
                 ( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( replace_underscores );
@@ -238,7 +246,7 @@ public class phyloxml_converter {
         }
         if ( field != null ) {
             for( final Phylogeny phy : phys ) {
-                PhylogenyMethods.transferNodeNameToField( phy, field );
+                PhylogenyMethods.transferNodeNameToField( phy, field, false );
             }
         }
         if ( midpoint_reroot ) {
@@ -255,6 +263,9 @@ public class phyloxml_converter {
         if ( order_subtrees ) {
             for( final Phylogeny phy : phys ) {
                 PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.TAXONOMY );
+                phy.externalNodesHaveChanged();
+                phy.clearHashIdToNodeMap();
+                phy.recalculateNumberOfExternalDescendants( true );
             }
         }
         try {
@@ -356,38 +367,38 @@ public class phyloxml_converter {
         System.out.println();
         System.out.println( " field options: " );
         System.out.println();
-        System.out.println( "   " + FIELD_CLADE_NAME + ": transfer name to node/clade name" );
-        System.out.println( "   " + FIELD_TAXONOMY_CODE + ": transfer name to taxonomy code" );
-        System.out.println( "   " + FIELD_TAXONOMY_SCI_NAME + ": transfer name to taxonomy scientific name" );
-        System.out.println( "   " + FIELD_TAXONOMY_COMM_NAME + ": transfer name to taxonomy common name" );
-        System.out.println( "   " + FIELD_SEQUENCE_GENE_NAME + ": transfer name to sequence name" );
-        System.out.println( "   " + FIELD_SEQUENCE_SYMBOL + ": transfer name to sequence symbol" );
-        System.out
-                .println( "   "
-                        + FIELD_UNIPROT_TAXONOMY_ID_SPLIT_1
-                        + ": transfer/split name to taxonomy uniprot identifier\n       (split at underscore if \"id_name\" pattern, e.g. \"817_SusD\")" );
-        System.out
-                .println( "   "
-                        + FIELD_UNIPROT_TAXONOMY_ID_SPLIT_2
-                        + ": transfer/split name to taxonomy uniprot identifier\n       (split at underscore if \"name_id\" pattern, e.g. \"SusD_817\")" );
+        System.out.println( "   " + FIELD_CLADE_NAME + ":    transfer name to node/clade name" );
+        System.out.println( "   " + FIELD_TAXONOMY_CODE + ":    transfer name to taxonomy code" );
+        System.out.println( "   " + FIELD_TAXONOMY_SCI_NAME + ":    transfer name to taxonomy scientific name" );
+        System.out.println( "   " + FIELD_TAXONOMY_COMM_NAME + ":    transfer name to taxonomy common name" );
+        System.out.println( "   " + FIELD_SEQUENCE_GENE_NAME + ":    transfer name to sequence name" );
+        System.out.println( "   " + FIELD_SEQUENCE_SYMBOL + ":    transfer name to sequence symbol" );
+        System.out.println( "   " + FIELD_DUMMY + ": to convert NHX formatted trees to phyloXML" );
+        System.out.println( "   " + FIELD_UNIPROT_TAXONOMY_ID_SPLIT_1
+                + ":    transfer/split name to taxonomy uniprot identifier" );
+        System.out.println( "          (split at underscore if \"id_name\" pattern, e.g. \"817_SusD\")" );
+        System.out.println( "   " + FIELD_UNIPROT_TAXONOMY_ID_SPLIT_2
+                + ":    transfer/split name to taxonomy uniprot identifier" );
+        System.out.println( "          (split at underscore if \"name_id\" pattern, e.g. \"SusD_817\")" );
         System.out.println();
         System.out.println( " options: " );
         System.out.println( " -" + INTERNAL_NAMES_ARE_BOOT_SUPPPORT
-                + " : internal names in NH or NHX tree are bootstrap support values" );
-        System.out.println( " -" + REPLACE_UNDER_SCORES + ": replace all underscores with spaces" );
-        System.out.println( " -" + MIDPOINT_REROOT + " : midpoint reroot" );
-        System.out.println( " -" + ORDER_SUBTREES + " : order subtrees" );
+                + "  : internal names in NH or NHX tree are bootstrap support values" );
+        System.out.println( " -" + REPLACE_UNDER_SCORES + " : replace all underscores with spaces" );
+        System.out.println( " -" + MIDPOINT_REROOT + "  : midpoint reroot" );
+        System.out.println( " -" + ORDER_SUBTREES + "  : order subtrees" );
         System.out
                 .println( " -"
                         + EXTRACT_TAXONOMY
-                        + ": extract taxonomy to taxonomy code from \"seqname_TAXON\"-style names (cannot be used with the following field options: "
+                        + " : extract taxonomy to taxonomy code from \"seqname_TAXON\"-style names (cannot be used with the following field options: "
                         + FIELD_TAXONOMY_CODE + ", " + FIELD_TAXONOMY_COMM_NAME + ", " + FIELD_TAXONOMY_SCI_NAME + ")" );
         System.out
                 .println( " -"
                         + EXTRACT_TAXONOMY_PF
-                        + ": extract taxonomy to taxonomy code from Pfam (\"seqname_TAXON/x-y\") style names only (cannot be used with the following field options: "
+                        + " : extract taxonomy to taxonomy code from Pfam (\"seqname_TAXON/x-y\") style names only (cannot be used with the following field options: "
                         + FIELD_TAXONOMY_CODE + ", " + FIELD_TAXONOMY_COMM_NAME + ", " + FIELD_TAXONOMY_SCI_NAME + ")" );
-        System.out.println( " -" + NO_TREE_LEVEL_INDENDATION + ": no tree level indendation in phyloXML output" );
+        System.out.println( " -" + NO_TREE_LEVEL_INDENDATION + " : no tree level indendation in phyloXML output" );
+        System.out.println( " -" + IGNORE_QUOTES + ": ignore quotes and whitespace (e.g. \"a b\" becomes ab)" );
         System.out.println();
     }
 }