in progress...
authorcmzmasek <chris.zma@outlook.com>
Sun, 3 Sep 2017 02:13:54 +0000 (19:13 -0700)
committercmzmasek <chris.zma@outlook.com>
Sun, 3 Sep 2017 02:13:54 +0000 (19:13 -0700)
forester/java/src/org/forester/application/cladinator.java
forester/java/src/org/forester/application/rid.java [moved from forester/java/src/org/forester/application/serin.java with 96% similarity]
forester/java/src/org/forester/clade_analysis/ResultMulti.java
forester/java/src/org/forester/msa/BasicMsa.java
forester/java/src/org/forester/util/ForesterUtil.java

index 69affd2..c53d102 100644 (file)
@@ -31,6 +31,7 @@ import java.text.DecimalFormat;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
 
 import org.forester.clade_analysis.AnalysisMulti;
 import org.forester.clade_analysis.Prefix;
@@ -41,20 +42,26 @@ import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
 import org.forester.phylogeny.factories.PhylogenyFactory;
 import org.forester.util.CommandLineArguments;
+import org.forester.util.EasyWriter;
 import org.forester.util.ForesterUtil;
 
 public final class cladinator {
 
-    final static private String        PRG_NAME      = "cladinator";
-    final static private String        PRG_VERSION   = "0.100";
-    final static private String        PRG_DATE      = "170823";
-    final static private String        PRG_DESC      = "clades within clades -- analysis of pplacer type outputs";
-    final static private String        E_MAIL        = "phyloxml@gmail.com";
-    final static private String        WWW           = "https://sites.google.com/site/cmzmasek/home/software/forester";
-    final static private String        HELP_OPTION_1 = "help";
-    final static private String        HELP_OPTION_2 = "h";
-    final static private String        SEP_OPTION    = "s";
-    private final static DecimalFormat df2           = new DecimalFormat( "0.0#" );
+    final static private String        PRG_NAME                 = "cladinator";
+    final static private String        PRG_VERSION              = "1.00";
+    final static private String        PRG_DATE                 = "170902";
+    final static private String        PRG_DESC                 = "clades within clades of annotated labels -- analysis of pplacer-type outputs";
+    final static private String        E_MAIL                   = "phyloxml@gmail.com";
+    final static private String        WWW                      = "https://sites.google.com/site/cmzmasek/home/software/forester";
+    final static private String        HELP_OPTION_1            = "help";
+    final static private String        HELP_OPTION_2            = "h";
+    final static private String        SEP_OPTION               = "s";
+    final static private String        QUERY_PATTERN_OPTION     = "q";
+    final static private String        SPECIFICS_CUTOFF_OPTION  = "c";
+    final static private double        SPECIFICS_CUTOFF_DEFAULT = 0.8;
+    final static private String        SEP_DEFAULT              = ".";
+    final static private Pattern       QUERY_PATTERN_DEFAULT    = AnalysisMulti.DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE;
+    private final static DecimalFormat df                       = new DecimalFormat( "0.0#######" );
 
     public static void main( final String args[] ) {
         try {
@@ -77,35 +84,77 @@ public final class cladinator {
                 print_help();
                 System.exit( 0 );
             }
-            else if ( ( ( args.length != 2 ) && ( args.length != 3 ) ) ) {
-                System.out.println();
-                System.out.println( "Wrong number of arguments." );
-                System.out.println();
+            if ( ( cla.getNumberOfNames() != 1 ) && ( cla.getNumberOfNames() != 2 ) ) {
                 print_help();
                 System.exit( -1 );
             }
             final List<String> allowed_options = new ArrayList<>();
             allowed_options.add( SEP_OPTION );
+            allowed_options.add( QUERY_PATTERN_OPTION );
+            allowed_options.add( SPECIFICS_CUTOFF_OPTION );
             final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
             if ( dissallowed_options.length() > 0 ) {
                 ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
             }
-            final String separator;
+            double cutoff_specifics = SPECIFICS_CUTOFF_DEFAULT;
+            if ( cla.isOptionSet( SPECIFICS_CUTOFF_OPTION ) ) {
+                if ( cla.isOptionValueSet( SPECIFICS_CUTOFF_OPTION ) ) {
+                    cutoff_specifics = cla.getOptionValueAsDouble( SPECIFICS_CUTOFF_OPTION );
+                    if ( cutoff_specifics < 0 ) {
+                        ForesterUtil.fatalError( PRG_NAME, "cutoff cannot be negative" );
+                    }
+                }
+                else {
+                    ForesterUtil.fatalError( PRG_NAME, "no value for cutoff for specifics" );
+                }
+            }
+            String separator = SEP_DEFAULT;
             if ( cla.isOptionSet( SEP_OPTION ) ) {
-                separator = cla.getOptionValue( SEP_OPTION );
+                if ( cla.isOptionValueSet( SEP_OPTION ) ) {
+                    separator = cla.getOptionValue( SEP_OPTION );
+                }
+                else {
+                    ForesterUtil.fatalError( PRG_NAME, "no value for separator option" );
+                }
             }
-            else {
-                separator = null;
+            Pattern compiled_query_str = null;
+            if ( cla.isOptionSet( QUERY_PATTERN_OPTION ) ) {
+                if ( cla.isOptionValueSet( QUERY_PATTERN_OPTION ) ) {
+                    final String query_str = cla.getOptionValue( QUERY_PATTERN_OPTION );
+                    try {
+                        compiled_query_str = Pattern.compile( query_str );
+                    }
+                    catch ( final PatternSyntaxException e ) {
+                        ForesterUtil.fatalError( PRG_NAME, "error in regular expression: " + e.getMessage() );
+                    }
+                }
+                else {
+                    ForesterUtil.fatalError( PRG_NAME, "no value for query pattern option" );
+                }
             }
+            final Pattern pattern = ( compiled_query_str != null ) ? compiled_query_str : QUERY_PATTERN_DEFAULT;
             final File intreefile = cla.getFile( 0 );
-            final String query = cla.getName( 1 );
-            System.out.println( "Input tree: " + intreefile );
-            System.out.println( "Query     : " + query );
-            if ( !ForesterUtil.isEmpty( separator ) ) {
-                System.out.println( "Separator : " + separator );
+            final String error_intreefile = ForesterUtil.isReadableFile( intreefile );
+            if ( !ForesterUtil.isEmpty( error_intreefile ) ) {
+                ForesterUtil.fatalError( PRG_NAME, error_intreefile );
+            }
+            final File outtablefile;
+            if ( cla.getNumberOfNames() > 1 ) {
+                outtablefile = cla.getFile( 1 );
+                final String error_outtablefile = ForesterUtil.isWritableFile( outtablefile );
+                if ( !ForesterUtil.isEmpty( error_outtablefile ) ) {
+                    ForesterUtil.fatalError( PRG_NAME, error_outtablefile );
+                }
             }
             else {
-                System.out.println( "Separator : none" );
+                outtablefile = null;
+            }
+            System.out.println( "Input tree                 : " + intreefile );
+            System.out.println( "Specific-hit support cutoff: " + cutoff_specifics );
+            System.out.println( "Annotation-separator       : " + separator );
+            System.out.println( "Query pattern              : " + pattern );
+            if ( outtablefile != null ) {
+                System.out.println( "Output table               : " + outtablefile );
             }
             Phylogeny p = null;
             try {
@@ -114,25 +163,48 @@ public final class cladinator {
                 p = factory.create( intreefile, pp )[ 0 ];
             }
             catch ( final IOException e ) {
-                System.out.println( "\nCould not read \"" + intreefile + "\" [" + e.getMessage() + "]\n" );
+                ForesterUtil.fatalError( PRG_NAME, "Could not read \"" + intreefile + "\" [" + e.getMessage() + "]" );
                 System.exit( -1 );
             }
-            final Pattern pattern = Pattern.compile( query );
-            final ResultMulti res = AnalysisMulti.execute( p, pattern, separator, 0.5 );
-            System.out.println();
-            System.out.println( "Result:" );
-            System.out.println( "Query                        : " + query );
-            ///////////////////
-            System.out.println( "Collapsed:" );
+            System.out.println( "Ext. nodes in input tree   : " + p.getNumberOfExternalNodes() );
+            final ResultMulti res = AnalysisMulti.execute( p, pattern, separator, cutoff_specifics );
+            printResult( res );
+            if ( outtablefile != null ) {
+                writeResultToTable( res, outtablefile );
+            }
+        }
+        catch ( final IllegalArgumentException e ) {
+            ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+        }
+        catch ( final IOException e ) {
+            ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            ForesterUtil.fatalError( PRG_NAME, "Unexpected errror!" );
+        }
+    }
+
+    private final static void printResult( final ResultMulti res ) {
+        System.out.println();
+        System.out.println( "Result:" );
+        System.out.println();
+        if ( ( res.getAllMultiHitPrefixes() == null ) | ( res.getAllMultiHitPrefixes().size() < 1 ) ) {
+            System.out.println( "No match to query pattern!" );
+        }
+        else {
+            System.out.println( "Matching Clade(s):" );
             for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) {
                 System.out.println( prefix );
             }
             if ( res.isHasSpecificMultiHitsPrefixes() ) {
-                System.out.println( "Specifics:" );
+                System.out.println();
+                System.out.println( "Specific-hit(s):" );
                 for( final Prefix prefix : res.getSpecificMultiHitPrefixes() ) {
                     System.out.println( prefix );
                 }
-                System.out.println( "Collapsed With Specifics:" );
+                System.out.println();
+                System.out.println( "Matching Clade(s) with Specific-hit(s):" );
                 for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) {
                     System.out.println( prefix );
                     for( final Prefix spec : res.getSpecificMultiHitPrefixes() ) {
@@ -143,40 +215,90 @@ public final class cladinator {
                 }
             }
             if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesDown() ) ) {
-                System.out.println( "Collapsed Down:" );
+                System.out.println();
+                System.out.println( "Matching Down-tree Bracketing Clade(s):" );
                 for( final Prefix prefix : res.getCollapsedMultiHitPrefixesDown() ) {
                     System.out.println( prefix );
                 }
             }
             if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesUp() ) ) {
-                System.out.println( "Collapsed Up:" );
-                for( final Prefix prefix : res.getAllMultiHitPrefixesUp() ) {
+                System.out.println();
+                System.out.println( "Matching Up-tree Bracketing Clade(s):" );
+                for( final Prefix prefix : res.getCollapsedMultiHitPrefixesUp() ) {
                     System.out.println( prefix );
                 }
             }
-            ///////////////////
-            System.out.println();
         }
-        catch ( final IllegalArgumentException e ) {
-            ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+        System.out.println();
+    }
+
+    private final static void writeResultToTable( final ResultMulti res, final File outtablefile ) throws IOException {
+        final EasyWriter w = ForesterUtil.createEasyWriter( outtablefile );
+        if ( ( res.getAllMultiHitPrefixes() == null ) | ( res.getAllMultiHitPrefixes().size() < 1 ) ) {
+            w.println( "No match to query pattern!" );
         }
-        catch ( final Exception e ) {
-            e.printStackTrace();
-            ForesterUtil.fatalError( PRG_NAME, "Unexpected errror!" );
+        else {
+            for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) {
+                w.print( "Matching Clades" );
+                w.print( "\t" );
+                w.print( prefix.getPrefix() );
+                w.print( "\t" );
+                w.print( df.format( prefix.getConfidence() ) );
+                w.println();
+            }
+            if ( res.isHasSpecificMultiHitsPrefixes() ) {
+                for( final Prefix prefix : res.getSpecificMultiHitPrefixes() ) {
+                    w.print( "Specific-hits" );
+                    w.print( "\t" );
+                    w.print( prefix.getPrefix() );
+                    w.print( "\t" );
+                    w.print( df.format( prefix.getConfidence() ) );
+                    w.println();
+                }
+            }
+            if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesDown() ) ) {
+                for( final Prefix prefix : res.getCollapsedMultiHitPrefixesDown() ) {
+                    w.print( "Matching Down-tree Bracketing Clades" );
+                    w.print( "\t" );
+                    w.print( prefix.getPrefix() );
+                    w.print( "\t" );
+                    w.print( df.format( prefix.getConfidence() ) );
+                    w.println();
+                }
+            }
+            if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesUp() ) ) {
+                for( final Prefix prefix : res.getCollapsedMultiHitPrefixesUp() ) {
+                    w.print( "Matching Up-tree Bracketing Clades" );
+                    w.print( "\t" );
+                    w.print( prefix.getPrefix() );
+                    w.print( "\t" );
+                    w.print( df.format( prefix.getConfidence() ) );
+                    w.println();
+                }
+            }
         }
+        w.flush();
+        w.close();
     }
 
     private final static void print_help() {
         System.out.println( "Usage:" );
         System.out.println();
-        System.out.println( PRG_NAME + " [options] <gene tree file> <query>" );
+        System.out.println( PRG_NAME + " [options] <input tree file> [output table file]" );
         System.out.println();
         System.out.println( " options:" );
-        System.out.println( "  -" + SEP_OPTION + "=<separator>: the separator to be used" );
+        System.out.println( "  -" + SPECIFICS_CUTOFF_OPTION
+                + "=<double>: the cutoff for \"specific-hit\" support values (default: " + SPECIFICS_CUTOFF_DEFAULT
+                + ")" );
+        System.out.println( "  -" + SEP_OPTION + "=<separator>: the annotation-separator to be used (default: "
+                + SEP_DEFAULT + ")" );
+        System.out.println( "  -" + QUERY_PATTERN_OPTION
+                + "=<query pattern>: the regular expression for the query (default: \"" + QUERY_PATTERN_DEFAULT
+                + "\" for pplacer output)" );
         System.out.println();
         System.out.println( "Example:" );
         System.out.println();
-        System.out.println( " " + PRG_NAME + " -s=. my_tree.xml A.1.1.1" );
+        System.out.println( " " + PRG_NAME + " -c=0.5 -s=. my_tree.nh result.tsv" );
         System.out.println();
     }
 }
@@ -23,16 +23,16 @@ import org.forester.util.CommandLineArguments;
 import org.forester.util.ForesterConstants;
 import org.forester.util.ForesterUtil;
 
-public class serin {
+public class rid {
 
-    final static private String PRG_NAME               = "serin";
-    final static private String PRG_DATE               = "170830";
+    final static private String PRG_NAME               = "rid";
+    final static private String PRG_DATE               = "170902";
     final static private String PRG_DESC               = "sequence file reformatting and identifier normalization";
     final static private String PRG_VERSION            = "1.00";
     final static private String WWW                    = "https://sites.google.com/site/cmzmasek/home/software/forester";
     final static private String E_MAIL                 = "phyloxml@gmail.com";
     final static private String OUTPUT_FORMAT_OPTION   = "o";
-    final static private String ID_NORM_OPTION         = "i";
+    final static private String ID_NORM_OPTION         = "s";
     final static private String HELP_OPTION_1          = "help";
     final static private String HELP_OPTION_2          = "h";
     private static final String OUTPUT_FORMAT_FASTA    = "f";
@@ -138,9 +138,12 @@ public class serin {
                         output_format = MSA_FORMAT.NEXUS;
                     }
                     else {
-                        ForesterUtil.fatalError( PRG_NAME, "unknown format option: " + output_format_str );
+                        ForesterUtil.fatalError( PRG_NAME, "unknown output format option: " + output_format_str );
                     }
                 }
+                else {
+                    ForesterUtil.fatalError( PRG_NAME, "no value for output format option"  );
+                }
             }
             final boolean normalize_identifiers;
             if ( cla.isOptionSet( ID_NORM_OPTION ) || ( cla.getNumberOfNames() == 3 ) ) {
@@ -252,7 +255,7 @@ public class serin {
                 System.out.println( "Sequence lenght min   : " + ( int ) stats.getMin() );
                 System.out.println( "Sequence lenght max   : " + ( int ) stats.getMax() );
                 if ( input_seqs.size() > 2 ) {
-                    System.out.println( "Sequence lenght median: " + ( int ) stats.median() );
+                    System.out.println( "Sequence length median: " + ( int ) stats.median() );
                 }
                 if ( ( output_format == MSA_FORMAT.NEXUS ) || ( output_format == MSA_FORMAT.PHYLIP ) ) {
                     ForesterUtil.fatalError( PRG_NAME,
@@ -283,10 +286,10 @@ public class serin {
                                                                                   seq.getMolecularSequenceAsString() );
                 output_seqs.add( ns );
             }
+            System.out.println();
             if ( normalize_identifiers ) {
                 output_map_writer.flush();
                 output_map_writer.close();
-                System.out.println();
                 System.out.println( "Wrote                 : " + output_map_file );
             }
             final BufferedWriter seq_writer = ForesterUtil.createBufferedWriter( outfile_seqs_file );
@@ -333,11 +336,11 @@ public class serin {
                 + OUTPUT_FORMAT_FASTA + " for Fasta (default), " + OUTPUT_FORMAT_PHYLIP_L + " or "
                 + OUTPUT_FORMAT_PHYLIP + " for Phylip, " + OUTPUT_FORMAT_NEXUS_L + " or " + OUTPUT_FORMAT_NEXUS
                 + " for Nexus" );
-        System.out.println( "  -" + ID_NORM_OPTION + ": to replace sequence names with short(er) identifiers" );
+        System.out.println( "  -" + ID_NORM_OPTION + "         : to replace sequence names with short(er) identifiers" );
         System.out.println();
         System.out.println( "Example:" );
         System.out.println();
-        System.out.println( " " + PRG_NAME + " -i -o=p my_seqs.fasta" );
+        System.out.println( " " + PRG_NAME + " -s -o=p my_seqs.fasta" );
         System.out.println();
     }
 }
index f9822ec..0fcda8b 100644 (file)
@@ -40,9 +40,9 @@ import org.forester.util.ForesterUtil;
 public final class ResultMulti {
 
     private final String       _separator;
-    private final List<Prefix> _greatest_common_prefixes      = new ArrayList<Prefix>();
-    private final List<Prefix> _greatest_common_prefixes_up   = new ArrayList<Prefix>();
-    private final List<Prefix> _greatest_common_prefixes_down = new ArrayList<Prefix>();
+    private final List<Prefix> _greatest_common_prefixes      = new ArrayList<>();
+    private final List<Prefix> _greatest_common_prefixes_up   = new ArrayList<>();
+    private final List<Prefix> _greatest_common_prefixes_down = new ArrayList<>();
     private List<Prefix>       _all                           = null;
     private List<Prefix>       _collapsed                     = null;
     private List<Prefix>       _cleaned_spec                  = null;
@@ -117,14 +117,7 @@ public final class ResultMulti {
     @Override
     public final String toString() {
         final StringBuilder sb = new StringBuilder();
-      //  sb.append( "Cleaned:" );
-      //  sb.append( ForesterUtil.LINE_SEPARATOR );
-      //  for( final Prefix prefix : _all ) {
-       //     sb.append( prefix );
-       //     sb.append( ForesterUtil.LINE_SEPARATOR );
-       // }
-       // sb.append( ForesterUtil.LINE_SEPARATOR );
-        sb.append( "Collapsed:" );
+        sb.append( "Matching Clade(s):" );
         sb.append( ForesterUtil.LINE_SEPARATOR );
         for( final Prefix prefix : _collapsed ) {
             sb.append( prefix );
@@ -132,14 +125,14 @@ public final class ResultMulti {
         }
         if ( _has_specifics ) {
             sb.append( ForesterUtil.LINE_SEPARATOR );
-            sb.append( "Specifics:" );
+            sb.append( "Specific-hit(s):" );
             sb.append( ForesterUtil.LINE_SEPARATOR );
             for( final Prefix prefix : _cleaned_spec ) {
                 sb.append( prefix );
                 sb.append( ForesterUtil.LINE_SEPARATOR );
             }
             sb.append( ForesterUtil.LINE_SEPARATOR );
-            sb.append( "Collapsed With Specifics:" );
+            sb.append( "Matching Clade(s) with Specific-hit(s):" );
             sb.append( ForesterUtil.LINE_SEPARATOR );
             for( final Prefix prefix : _collapsed ) {
                 sb.append( prefix );
@@ -154,79 +147,21 @@ public final class ResultMulti {
         }
         if ( !ForesterUtil.isEmpty( _all_down ) ) {
             sb.append( ForesterUtil.LINE_SEPARATOR );
-        //    sb.append( "Cleaned Down:" );
-        //    sb.append( ForesterUtil.LINE_SEPARATOR );
-        //    for( final Prefix prefix : _all_down ) {
-        //        sb.append( prefix );
-        //        sb.append( ForesterUtil.LINE_SEPARATOR );
-         //   }
-           // sb.append( ForesterUtil.LINE_SEPARATOR );
-            sb.append( "Collapsed Down:" );
+            sb.append( "Matching Down-tree Bracketing Clade(s):" );
             sb.append( ForesterUtil.LINE_SEPARATOR );
             for( final Prefix prefix : _collapsed_down ) {
                 sb.append( prefix );
                 sb.append( ForesterUtil.LINE_SEPARATOR );
             }
-          /*  if ( _has_specifics_down ) {
-                sb.append( ForesterUtil.LINE_SEPARATOR );
-                sb.append( "Specifics Down:" );
-                sb.append( ForesterUtil.LINE_SEPARATOR );
-                for( final Prefix prefix : _cleaned_spec_down ) {
-                    sb.append( prefix );
-                    sb.append( ForesterUtil.LINE_SEPARATOR );
-                }
-                sb.append( ForesterUtil.LINE_SEPARATOR );
-                sb.append( "Collapsed With Specifics Down:" );
-                sb.append( ForesterUtil.LINE_SEPARATOR );
-                for( final Prefix prefix : _collapsed_down ) {
-                    sb.append( prefix );
-                    sb.append( ForesterUtil.LINE_SEPARATOR );
-                    for( final Prefix spec : _cleaned_spec_down ) {
-                        if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
-                            sb.append( "    " + spec );
-                            sb.append( ForesterUtil.LINE_SEPARATOR );
-                        }
-                    }
-                }
-            }*/
         }
         if ( !ForesterUtil.isEmpty( _all_up ) ) {
             sb.append( ForesterUtil.LINE_SEPARATOR );
-        //    sb.append( "Cleaned Up:" );
-        //    sb.append( ForesterUtil.LINE_SEPARATOR );
-        //    for( final Prefix prefix : _all_up ) {
-        //        sb.append( prefix );
-         //       sb.append( ForesterUtil.LINE_SEPARATOR );
-         //   }
-         //   sb.append( ForesterUtil.LINE_SEPARATOR );
-            sb.append( "Collapsed Up:" );
+            sb.append( "Matching Up-tree Bracketing Clade(s):" );
             sb.append( ForesterUtil.LINE_SEPARATOR );
             for( final Prefix prefix : _collapsed_up ) {
                 sb.append( prefix );
                 sb.append( ForesterUtil.LINE_SEPARATOR );
             }
-          /*  if ( _has_specifics ) {
-                sb.append( ForesterUtil.LINE_SEPARATOR );
-                sb.append( "Specifics Up:" );
-                sb.append( ForesterUtil.LINE_SEPARATOR );
-                for( final Prefix prefix : _cleaned_spec_up ) {
-                    sb.append( prefix );
-                    sb.append( ForesterUtil.LINE_SEPARATOR );
-                }
-                sb.append( ForesterUtil.LINE_SEPARATOR );
-                sb.append( "Collapsed With Specifics Up:" );
-                sb.append( ForesterUtil.LINE_SEPARATOR );
-                for( final Prefix prefix : _collapsed_up ) {
-                    sb.append( prefix );
-                    sb.append( ForesterUtil.LINE_SEPARATOR );
-                    for( final Prefix spec : _cleaned_spec_up ) {
-                        if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
-                            sb.append( "    " + spec );
-                            sb.append( ForesterUtil.LINE_SEPARATOR );
-                        }
-                    }
-                }
-            }*/
         }
         return sb.toString();
     }
@@ -251,17 +186,17 @@ public final class ResultMulti {
     }
 
     private final void reset() {
-        _all = new ArrayList<Prefix>();
-        _collapsed = new ArrayList<Prefix>();
-        _cleaned_spec = new ArrayList<Prefix>();
+        _all = new ArrayList<>();
+        _collapsed = new ArrayList<>();
+        _cleaned_spec = new ArrayList<>();
         _has_specifics = false;
-        _all_up = new ArrayList<Prefix>();
-        _collapsed_up = new ArrayList<Prefix>();
-        _cleaned_spec_up = new ArrayList<Prefix>();
+        _all_up = new ArrayList<>();
+        _collapsed_up = new ArrayList<>();
+        _cleaned_spec_up = new ArrayList<>();
         _has_specifics_up = false;
-        _all_down = new ArrayList<Prefix>();
-        _collapsed_down = new ArrayList<Prefix>();
-        _cleaned_spec_down = new ArrayList<Prefix>();
+        _all_down = new ArrayList<>();
+        _collapsed_down = new ArrayList<>();
+        _cleaned_spec_down = new ArrayList<>();
         _has_specifics_down = false;
     }
 
index 02da257..bf215d8 100644 (file)
@@ -143,7 +143,7 @@ public class BasicMsa implements Msa {
         }
         if ( _identifiers_set.contains( id ) ) {
             throw new IllegalArgumentException( "illegal attempt to create msa with non-unique identifiers [" + id
-                                                + "]" );
+                    + "]" );
         }
         _identifiers_set.add( id );
         _identifiers[ row ] = id;
@@ -200,13 +200,32 @@ public class BasicMsa implements Msa {
 
     private void writeToNexus( final Writer w ) throws IOException {
         final int max = determineMaxIdLength() + 1;
+        TYPE t = null;
+        for( int row = 0; row < getNumberOfSequences(); ++row ) {
+            t = ForesterUtil.guessMolecularSequenceType( getSequence( row ).getMolecularSequenceAsString() );
+            if ( t != null ) {
+                break;
+            }
+        }
+        String type_str = "Protein";
+        if ( t != null ) {
+            if ( t == TYPE.DNA ) {
+                type_str = "DNA";
+            }
+            else if ( t == TYPE.RNA ) {
+                type_str = "RNA";
+            }
+            else if ( t == TYPE.GENERAL ) {
+                type_str = "Standard";
+            }
+        }
         w.write( "Begin Data;" );
         w.write( ForesterUtil.LINE_SEPARATOR );
         w.write( "   Dimensions NTax=" + getNumberOfSequences() );
         w.write( " NChar=" + getLength() );
         w.write( ";" );
         w.write( ForesterUtil.LINE_SEPARATOR );
-        w.write( "   Format DataType=Protein Interleave=No gap=-;" );
+        w.write( "   Format DataType=" + type_str + " Interleave=No gap=-;" );
         w.write( ForesterUtil.LINE_SEPARATOR );
         w.write( "   Matrix" );
         w.write( ForesterUtil.LINE_SEPARATOR );
index dc89976..5d70578 100644 (file)
@@ -542,15 +542,16 @@ public final class ForesterUtil {
     }
 
     final public static MolecularSequence.TYPE guessMolecularSequenceType( final String mol_seq ) {
-        if ( mol_seq.contains( "L" ) || mol_seq.contains( "I" ) || mol_seq.contains( "E" ) || mol_seq.contains( "H" )
-                || mol_seq.contains( "D" ) || mol_seq.contains( "Q" ) ) {
+        final String s = mol_seq.toUpperCase();
+        if ( s.contains( "L" ) || s.contains( "I" ) || s.contains( "E" ) || s.contains( "H" )
+                || s.contains( "D" ) || s.contains( "Q" ) ) {
             return TYPE.AA;
         }
         else {
-            if ( mol_seq.contains( "T" ) ) {
+            if ( s.contains( "T" ) ) {
                 return TYPE.DNA;
             }
-            else if ( mol_seq.contains( "U" ) ) {
+            else if ( s.contains( "U" ) ) {
                 return TYPE.RNA;
             }
         }