in progress
[jalview.git] / forester / java / src / org / forester / io / writers / PhylogenyWriter.java
index e4b7dc7..911ebb5 100644 (file)
@@ -27,7 +27,6 @@ package org.forester.io.writers;
 
 import java.io.BufferedWriter;
 import java.io.File;
-import java.io.FileWriter;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
@@ -54,17 +53,17 @@ public final class PhylogenyWriter {
     public final static String          PHYLO_XML_INTENDATION_BASE      = "  ";
     public final static String          PHYLO_XML_VERSION_ENCODING_LINE = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
     public final static String          PHYLO_XML_NAMESPACE_LINE        = "<phyloxml xmlns:xsi=\""
-                                                                                + ForesterConstants.XML_SCHEMA_INSTANCE
-                                                                                + "\" xsi:schemaLocation=\""
-                                                                                + ForesterConstants.PHYLO_XML_LOCATION
-                                                                                + " "
-                                                                                + ForesterConstants.PHYLO_XML_LOCATION
-                                                                                + "/"
-                                                                                + ForesterConstants.PHYLO_XML_VERSION
-                                                                                + "/" + ForesterConstants.PHYLO_XML_XSD
-                                                                                + "\" " + "xmlns=\""
-                                                                                + ForesterConstants.PHYLO_XML_LOCATION
-                                                                                + "\">";
+            + ForesterConstants.XML_SCHEMA_INSTANCE
+            + "\" xsi:schemaLocation=\""
+            + ForesterConstants.PHYLO_XML_LOCATION
+            + " "
+            + ForesterConstants.PHYLO_XML_LOCATION
+            + "/"
+            + ForesterConstants.PHYLO_XML_VERSION
+            + "/" + ForesterConstants.PHYLO_XML_XSD
+            + "\" " + "xmlns=\""
+            + ForesterConstants.PHYLO_XML_LOCATION
+            + "\">";
     public final static String          PHYLO_XML_END                   = "</phyloxml>";
     private boolean                     _saw_comma;
     private StringBuffer                _buffer;
@@ -72,7 +71,6 @@ public final class PhylogenyWriter {
     private PhylogenyNode               _root;
     private boolean                     _has_next;
     private Stack<PostOrderStackObject> _stack;
-    private boolean                     _simple_nh;
     private boolean                     _nh_write_distance_to_parent;
     NH_CONVERSION_SUPPORT_VALUE_STYLE   _nh_conversion_support_style;
     private boolean                     _indent_phyloxml;
@@ -113,7 +111,7 @@ public final class PhylogenyWriter {
         if ( tree.getConfidence() != null ) {
             if ( ForesterUtil.isEmpty( tree.getConfidence().getType() ) ) {
                 PhylogenyDataUtil.appendElement( writer, PhyloXmlMapping.CONFIDENCE, tree.getConfidence().getValue()
-                        + "", indentation );
+                                                 + "", indentation );
             }
             PhylogenyDataUtil.appendElement( writer,
                                              PhyloXmlMapping.CONFIDENCE,
@@ -202,10 +200,6 @@ public final class PhylogenyWriter {
         return _saw_comma;
     }
 
-    private boolean isSimpleNH() {
-        return _simple_nh;
-    }
-
     private boolean isWriteDistanceToParentInNH() {
         return _nh_write_distance_to_parent;
     }
@@ -307,10 +301,6 @@ public final class PhylogenyWriter {
         _saw_comma = saw_comma;
     }
 
-    private void setSimpleNH( final boolean simple_nh ) {
-        _simple_nh = simple_nh;
-    }
-
     private void setStack( final Stack<PostOrderStackObject> stack ) {
         _stack = stack;
     }
@@ -324,62 +314,53 @@ public final class PhylogenyWriter {
     }
 
     public void toNewHampshire( final List<Phylogeny> trees,
-                                final boolean simple_nh,
                                 final boolean write_distance_to_parent,
                                 final File out_file,
                                 final String separator ) throws IOException {
         final Iterator<Phylogeny> it = trees.iterator();
         final StringBuffer sb = new StringBuffer();
         while ( it.hasNext() ) {
-            sb.append( toNewHampshire( it.next(), simple_nh, write_distance_to_parent ) );
+            sb.append( toNewHampshire( it.next(), write_distance_to_parent ) );
             sb.append( separator );
         }
         writeToFile( sb, out_file );
     }
 
     public StringBuffer toNewHampshire( final Phylogeny tree,
-                                        final boolean simple_nh,
                                         final boolean nh_write_distance_to_parent,
                                         final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) throws IOException {
         setOutputFormt( FORMAT.NH );
         setNhConversionSupportStyle( svs );
-        setSimpleNH( simple_nh );
         setWriteDistanceToParentInNH( nh_write_distance_to_parent );
         return getOutput( tree );
     }
 
-    public StringBuffer toNewHampshire( final Phylogeny tree,
-                                        final boolean simple_nh,
-                                        final boolean nh_write_distance_to_parent ) throws IOException {
+    public StringBuffer toNewHampshire( final Phylogeny tree, final boolean nh_write_distance_to_parent )
+            throws IOException {
         setOutputFormt( FORMAT.NH );
-        setSimpleNH( simple_nh );
         setWriteDistanceToParentInNH( nh_write_distance_to_parent );
         return getOutput( tree );
     }
 
-    public void toNewHampshire( final Phylogeny tree,
-                                final boolean simple_nh,
-                                final boolean write_distance_to_parent,
-                                final File out_file ) throws IOException {
-        writeToFile( toNewHampshire( tree, simple_nh, write_distance_to_parent ), out_file );
+    public void toNewHampshire( final Phylogeny tree, final boolean write_distance_to_parent, final File out_file )
+            throws IOException {
+        writeToFile( toNewHampshire( tree, write_distance_to_parent ), out_file );
     }
 
     public void toNewHampshire( final Phylogeny tree,
-                                final boolean simple_nh,
                                 final boolean write_distance_to_parent,
                                 final NH_CONVERSION_SUPPORT_VALUE_STYLE svs,
                                 final File out_file ) throws IOException {
-        writeToFile( toNewHampshire( tree, simple_nh, write_distance_to_parent, svs ), out_file );
+        writeToFile( toNewHampshire( tree, write_distance_to_parent, svs ), out_file );
     }
 
     public void toNewHampshire( final Phylogeny[] trees,
-                                final boolean simple_nh,
                                 final boolean write_distance_to_parent,
                                 final File out_file,
                                 final String separator ) throws IOException {
         final StringBuffer sb = new StringBuffer();
         for( final Phylogeny element : trees ) {
-            sb.append( toNewHampshire( element, simple_nh, write_distance_to_parent ) );
+            sb.append( toNewHampshire( element, write_distance_to_parent ) );
             sb.append( separator );
         }
         writeToFile( sb, out_file );
@@ -417,7 +398,7 @@ public final class PhylogenyWriter {
 
     public void toNexus( final File out_file, final Phylogeny tree, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs )
             throws IOException {
-        final Writer writer = new BufferedWriter( new PrintWriter( out_file ) );
+        final Writer writer = new BufferedWriter( new PrintWriter( out_file, ForesterConstants.UTF_8 ) );
         final List<Phylogeny> trees = new ArrayList<Phylogeny>( 1 );
         trees.add( tree );
         writeNexusStart( writer );
@@ -444,14 +425,14 @@ public final class PhylogenyWriter {
                             final List<Phylogeny> trees,
                             final int phyloxml_level,
                             final String separator ) throws IOException {
-        final Writer writer = new BufferedWriter( new PrintWriter( out_file ) );
+        final Writer writer = new BufferedWriter( new PrintWriter( out_file, ForesterConstants.UTF_8 ) );
         toPhyloXML( writer, trees, phyloxml_level, separator );
         writer.flush();
         writer.close();
     }
 
     public void toPhyloXML( final File out_file, final Phylogeny tree, final int phyloxml_level ) throws IOException {
-        final Writer writer = new BufferedWriter( new PrintWriter( out_file ) );
+        final Writer writer = new BufferedWriter( new PrintWriter( out_file, ForesterConstants.UTF_8 ) );
         writePhyloXmlStart( writer );
         toPhyloXMLNoPhyloXmlSource( writer, tree, phyloxml_level );
         writePhyloXmlEnd( writer );
@@ -575,9 +556,7 @@ public final class PhylogenyWriter {
             getBuffer().append( node.toNewHampshireX() );
         }
         else if ( getOutputFormt() == FORMAT.NH ) {
-            getBuffer().append( node.toNewHampshire( isSimpleNH(),
-                                                     isWriteDistanceToParentInNH(),
-                                                     getNhConversionSupportStyle() ) );
+            getBuffer().append( node.toNewHampshire( isWriteDistanceToParentInNH(), getNhConversionSupportStyle() ) );
         }
     }
 
@@ -603,7 +582,7 @@ public final class PhylogenyWriter {
                     PhylogenyDataUtil.appendOpen( getWriter(),
                                                   PhyloXmlMapping.CLADE,
                                                   PhyloXmlMapping.NODE_COLLAPSE,
-                                                  "true" );
+                            "true" );
                 }
                 else {
                     PhylogenyDataUtil.appendOpen( getWriter(), PhyloXmlMapping.CLADE );
@@ -653,18 +632,8 @@ public final class PhylogenyWriter {
         if ( out_file.exists() ) {
             throw new IOException( "attempt to overwrite existing file \"" + out_file.getAbsolutePath() + "\"" );
         }
-        final PrintWriter out = new PrintWriter( new FileWriter( out_file ), true );
-        if ( getOutputFormt() == FORMAT.PHYLO_XML ) {
-            out.print( PHYLO_XML_VERSION_ENCODING_LINE );
-            out.print( ForesterUtil.LINE_SEPARATOR );
-            out.print( PHYLO_XML_NAMESPACE_LINE );
-            out.print( ForesterUtil.LINE_SEPARATOR );
-        }
+        final PrintWriter out = new PrintWriter( out_file, ForesterConstants.UTF_8 );
         out.print( sb );
-        if ( getOutputFormt() == FORMAT.PHYLO_XML ) {
-            out.print( ForesterUtil.LINE_SEPARATOR );
-            out.print( PHYLO_XML_END );
-        }
         out.flush();
         out.close();
     }
@@ -708,19 +677,19 @@ public final class PhylogenyWriter {
                 else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getCommonName() ) ) {
                     data = node.getNodeData().getTaxonomy().getCommonName();
                 }
-                else if ( node.getNodeData().getTaxonomy().getTaxonomyCode() != null ) {
-                    data = node.getNodeData().getTaxonomy().getTaxonomyCode();
-                }
             }
             else if ( node.getNodeData().isHasSequence() ) {
                 if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) {
                     data = node.getNodeData().getSequence().getName();
                 }
+                else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) {
+                    data = node.getNodeData().getSequence().getSymbol();
+                }
+                else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getGeneName() ) ) {
+                    data = node.getNodeData().getSequence().getGeneName();
+                }
             }
-            if ( data.length() > 0 ) {
-                data = data.replaceAll( " ", "_" );
-            }
-            writer.write( data );
+            writer.write( ForesterUtil.santitizeStringForNH( data ).toString() );
         }
         writer.write( ";" );
         writer.write( ForesterUtil.LINE_SEPARATOR );
@@ -754,7 +723,7 @@ public final class PhylogenyWriter {
             else {
                 writer.write( "[&U]" );
             }
-            writer.write( phylogeny.toNewHampshire( false, svs ) );
+            writer.write( phylogeny.toNewHampshire( svs ) );
             writer.write( ForesterUtil.LINE_SEPARATOR );
             i++;
         }
@@ -778,3 +747,7 @@ public final class PhylogenyWriter {
         NH, NHX, PHYLO_XML, NEXUS;
     }
 }
+
+
+
+