work for rrm project (ComPhy 2012 Moscow)
[jalview.git] / forester / java / src / org / forester / application / phyloxml_converter.java
index 9822e1f..25497ec 100644 (file)
@@ -6,7 +6,7 @@
 // Copyright (C) 2008-2009 Christian M. Zmasek
 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
 // All rights reserved
-// 
+//
 // This library is free software; you can redistribute it and/or
 // modify it under the terms of the GNU Lesser General Public
 // License as published by the Free Software Foundation; either
@@ -16,7 +16,7 @@
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 // Lesser General Public License for more details.
-// 
+//
 // You should have received a copy of the GNU Lesser General Public
 // License along with this library; if not, write to the Free Software
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
@@ -34,44 +34,48 @@ import java.util.List;
 import org.forester.io.parsers.PhylogenyParser;
 import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
 import org.forester.io.parsers.nhx.NHXParser;
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
+import org.forester.io.parsers.util.ParserUtils;
 import org.forester.io.writers.PhylogenyWriter;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.util.CommandLineArguments;
 import org.forester.util.ForesterUtil;
-import org.forester.util.ForesterUtil.PhylogenyNodeField;
 
 public class phyloxml_converter {
 
-    final static private String  HELP_OPTION_1                    = "help";
-    final static private String  HELP_OPTION_2                    = "h";
-    final static private String  FIELD_OPTION                     = "f";
-    final static private String  FIELD_CLADE_NAME                 = "nn";
-    final static private String  FIELD_TAXONOMY_CODE              = "tc";
-    final static private String  FIELD_TAXONOMY_SCI_NAME          = "sn";
-    final static private String  FIELD_TAXONOMY_COMM_NAME         = "cn";
-    final static private String  FIELD_SEQUENCE_GENE_NAME         = "gn";
-    final static private String  FIELD_SEQUENCE_SYMBOL            = "sy";
-    final static private String  FIELD_DUMMY                      = "dummy";
-    final static private String  INTERNAL_NAMES_ARE_BOOT_SUPPPORT = "i";
-    final static private String  MIDPOINT_REROOT                  = "m";
-    final static private String  EXTRACT_TAXONOMY                 = "xt";
-    final static private String  EXTRACT_TAXONOMY_PF              = "xp";
-    final static private String  ORDER_SUBTREES                   = "o";
-    final static private String  NO_TREE_LEVEL_INDENDATION        = "ni";
-    final static private String  REPLACE_UNDER_SCORES             = "ru";
-    final static private String  PRG_NAME                         = "phyloxml_converter";
-    final static private String  PRG_VERSION                      = "1.21";
-    final static private String  PRG_DATE                         = "2010.10.02";
-    final static private String  E_MAIL                           = "czmasek@burnham.org";
-    final static private String  WWW                              = "www.phylosoft.org/forester/";
-    final static private boolean SPECIAL                          = false;
+    final static private String  HELP_OPTION_1                     = "help";
+    final static private String  HELP_OPTION_2                     = "h";
+    final static private String  FIELD_OPTION                      = "f";
+    final static private String  FIELD_CLADE_NAME                  = "nn";
+    final static private String  FIELD_TAXONOMY_CODE               = "tc";
+    final static private String  FIELD_TAXONOMY_SCI_NAME           = "sn";
+    final static private String  FIELD_TAXONOMY_COMM_NAME          = "cn";
+    final static private String  FIELD_SEQUENCE_GENE_NAME          = "gn";
+    final static private String  FIELD_SEQUENCE_SYMBOL             = "sy";
+    final static private String  FIELD_UNIPROT_TAXONOMY_ID_SPLIT_1 = "i1";
+    final static private String  FIELD_UNIPROT_TAXONOMY_ID_SPLIT_2 = "i2";
+    final static private String  FIELD_DUMMY                       = "dummy";
+    final static private String  INTERNAL_NAMES_ARE_BOOT_SUPPPORT  = "i";
+    final static private String  MIDPOINT_REROOT                   = "m";
+    final static private String  EXTRACT_TAXONOMY                  = "xt";
+    final static private String  EXTRACT_TAXONOMY_PF               = "xp";
+    final static private String  ORDER_SUBTREES                    = "o";
+    final static private String  NO_TREE_LEVEL_INDENDATION         = "ni";
+    final static private String  REPLACE_UNDER_SCORES              = "ru";
+    final static private String  PRG_NAME                          = "phyloxml_converter";
+    final static private String  PRG_VERSION                       = "1.30";
+    final static private String  PRG_DATE                          = "2011.03.01";
+    final static private String  E_MAIL                            = "phylosoft@gmail.com";
+    final static private String  WWW                               = "www.phylosoft.org/forester/";
+    final static private boolean SPECIAL                           = false;
 
-    public static void main( final String args[] ) {
+    public static void main( final String args[] ) throws PhyloXmlDataFormatException {
         ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
         CommandLineArguments cla = null;
         try {
@@ -123,24 +127,30 @@ public class phyloxml_converter {
             System.exit( -1 );
         }
         final String field_option_value = cla.getOptionValue( FIELD_OPTION );
-        PhylogenyNodeField field = null;
+        PhylogenyMethods.PhylogenyNodeField field = null;
         if ( field_option_value.equals( FIELD_CLADE_NAME ) ) {
-            field = PhylogenyNodeField.CLADE_NAME;
+            field = PhylogenyMethods.PhylogenyNodeField.CLADE_NAME;
         }
         else if ( field_option_value.equals( FIELD_TAXONOMY_CODE ) ) {
-            field = PhylogenyNodeField.TAXONOMY_CODE;
+            field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE;
         }
         else if ( field_option_value.equals( FIELD_TAXONOMY_SCI_NAME ) ) {
-            field = PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME;
+            field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME;
         }
         else if ( field_option_value.equals( FIELD_TAXONOMY_COMM_NAME ) ) {
-            field = PhylogenyNodeField.TAXONOMY_COMMON_NAME;
+            field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_COMMON_NAME;
         }
         else if ( field_option_value.equals( FIELD_SEQUENCE_GENE_NAME ) ) {
-            field = PhylogenyNodeField.SEQUENCE_NAME;
+            field = PhylogenyMethods.PhylogenyNodeField.SEQUENCE_NAME;
         }
         else if ( field_option_value.equals( FIELD_SEQUENCE_SYMBOL ) ) {
-            field = PhylogenyNodeField.SEQUENCE_SYMBOL;
+            field = PhylogenyMethods.PhylogenyNodeField.SEQUENCE_SYMBOL;
+        }
+        else if ( field_option_value.equals( FIELD_UNIPROT_TAXONOMY_ID_SPLIT_1 ) ) {
+            field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID_UNIPROT_1;
+        }
+        else if ( field_option_value.equals( FIELD_UNIPROT_TAXONOMY_ID_SPLIT_2 ) ) {
+            field = PhylogenyMethods.PhylogenyNodeField.TAXONOMY_ID_UNIPROT_2;
         }
         else if ( field_option_value.equals( FIELD_DUMMY ) ) {
         }
@@ -187,23 +197,23 @@ public class phyloxml_converter {
         Phylogeny[] phys = null;
         try {
             final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
-            final PhylogenyParser parser = ForesterUtil.createParserDependingOnFileType( infile, true );
+            final PhylogenyParser parser = ParserUtils.createParserDependingOnFileType( infile, true );
             if ( parser instanceof NHXParser ) {
-                if ( ( field != PhylogenyNodeField.TAXONOMY_CODE )
-                        && ( field != PhylogenyNodeField.TAXONOMY_COMMON_NAME )
-                        && ( field != PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME ) ) {
+                if ( ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_CODE )
+                        && ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_COMMON_NAME )
+                        && ( field != PhylogenyMethods.PhylogenyNodeField.TAXONOMY_SCIENTIFIC_NAME ) ) {
                     if ( extr_taxonomy_pf_only ) {
                         ( ( NHXParser ) parser )
-                                .setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
+                                .setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY );
                         replace_underscores = false;
                     }
                     else if ( extr_taxonomy ) {
-                        ( ( NHXParser ) parser ).setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.YES );
+                        ( ( NHXParser ) parser ).setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.YES );
                         replace_underscores = false;
                     }
                 }
                 else {
-                    ( ( NHXParser ) parser ).setTaxonomyExtraction( ForesterUtil.TAXONOMY_EXTRACTION.NO );
+                    ( ( NHXParser ) parser ).setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.NO );
                 }
                 ( ( NHXParser ) parser ).setReplaceUnderscores( replace_underscores );
                 ( ( NHXParser ) parser ).setIgnoreQuotes( false );
@@ -224,12 +234,12 @@ public class phyloxml_converter {
         }
         if ( int_values_are_boots ) {
             for( final Phylogeny phy : phys ) {
-                ForesterUtil.transferInternalNamesToBootstrapSupport( phy );
+                PhylogenyMethods.transferInternalNamesToBootstrapSupport( phy );
             }
         }
         if ( field != null ) {
             for( final Phylogeny phy : phys ) {
-                ForesterUtil.transferNodeNameToField( phy, field );
+                PhylogenyMethods.transferNodeNameToField( phy, field, false );
             }
         }
         if ( midpoint_reroot ) {
@@ -245,7 +255,10 @@ public class phyloxml_converter {
         }
         if ( order_subtrees ) {
             for( final Phylogeny phy : phys ) {
-                phy.orderAppearance( true );
+                PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.TAXONOMY );
+                phy.externalNodesHaveChanged();
+                phy.clearHashIdToNodeMap();
+                phy.recalculateNumberOfExternalDescendants( true );
             }
         }
         try {
@@ -353,6 +366,14 @@ public class phyloxml_converter {
         System.out.println( "   " + FIELD_TAXONOMY_COMM_NAME + ": transfer name to taxonomy common name" );
         System.out.println( "   " + FIELD_SEQUENCE_GENE_NAME + ": transfer name to sequence name" );
         System.out.println( "   " + FIELD_SEQUENCE_SYMBOL + ": transfer name to sequence symbol" );
+        System.out
+                .println( "   "
+                        + FIELD_UNIPROT_TAXONOMY_ID_SPLIT_1
+                        + ": transfer/split name to taxonomy uniprot identifier\n       (split at underscore if \"id_name\" pattern, e.g. \"817_SusD\")" );
+        System.out
+                .println( "   "
+                        + FIELD_UNIPROT_TAXONOMY_ID_SPLIT_2
+                        + ": transfer/split name to taxonomy uniprot identifier\n       (split at underscore if \"name_id\" pattern, e.g. \"SusD_817\")" );
         System.out.println();
         System.out.println( " options: " );
         System.out.println( " -" + INTERNAL_NAMES_ARE_BOOT_SUPPPORT