From 50c2ff100214fbb04b5f9665466e6d98d26b051d Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Sat, 2 Sep 2017 19:13:54 -0700 Subject: [PATCH] in progress... --- .../src/org/forester/application/cladinator.java | 216 +++++++++++++++----- .../forester/application/{serin.java => rid.java} | 21 +- .../org/forester/clade_analysis/ResultMulti.java | 99 ++------- forester/java/src/org/forester/msa/BasicMsa.java | 23 ++- .../java/src/org/forester/util/ForesterUtil.java | 9 +- 5 files changed, 224 insertions(+), 144 deletions(-) rename forester/java/src/org/forester/application/{serin.java => rid.java} (96%) diff --git a/forester/java/src/org/forester/application/cladinator.java b/forester/java/src/org/forester/application/cladinator.java index 69affd2..c53d102 100644 --- a/forester/java/src/org/forester/application/cladinator.java +++ b/forester/java/src/org/forester/application/cladinator.java @@ -31,6 +31,7 @@ import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; import org.forester.clade_analysis.AnalysisMulti; import org.forester.clade_analysis.Prefix; @@ -41,20 +42,26 @@ import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.util.CommandLineArguments; +import org.forester.util.EasyWriter; import org.forester.util.ForesterUtil; public final class cladinator { - final static private String PRG_NAME = "cladinator"; - final static private String PRG_VERSION = "0.100"; - final static private String PRG_DATE = "170823"; - final static private String PRG_DESC = "clades within clades -- analysis of pplacer type outputs"; - final static private String E_MAIL = "phyloxml@gmail.com"; - final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String SEP_OPTION = "s"; - private final static DecimalFormat df2 = new DecimalFormat( "0.0#" ); + final static private String PRG_NAME = "cladinator"; + final static private String PRG_VERSION = "1.00"; + final static private String PRG_DATE = "170902"; + final static private String PRG_DESC = "clades within clades of annotated labels -- analysis of pplacer-type outputs"; + final static private String E_MAIL = "phyloxml@gmail.com"; + final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String SEP_OPTION = "s"; + final static private String QUERY_PATTERN_OPTION = "q"; + final static private String SPECIFICS_CUTOFF_OPTION = "c"; + final static private double SPECIFICS_CUTOFF_DEFAULT = 0.8; + final static private String SEP_DEFAULT = "."; + final static private Pattern QUERY_PATTERN_DEFAULT = AnalysisMulti.DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE; + private final static DecimalFormat df = new DecimalFormat( "0.0#######" ); public static void main( final String args[] ) { try { @@ -77,35 +84,77 @@ public final class cladinator { print_help(); System.exit( 0 ); } - else if ( ( ( args.length != 2 ) && ( args.length != 3 ) ) ) { - System.out.println(); - System.out.println( "Wrong number of arguments." ); - System.out.println(); + if ( ( cla.getNumberOfNames() != 1 ) && ( cla.getNumberOfNames() != 2 ) ) { print_help(); System.exit( -1 ); } final List allowed_options = new ArrayList<>(); allowed_options.add( SEP_OPTION ); + allowed_options.add( QUERY_PATTERN_OPTION ); + allowed_options.add( SPECIFICS_CUTOFF_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options ); } - final String separator; + double cutoff_specifics = SPECIFICS_CUTOFF_DEFAULT; + if ( cla.isOptionSet( SPECIFICS_CUTOFF_OPTION ) ) { + if ( cla.isOptionValueSet( SPECIFICS_CUTOFF_OPTION ) ) { + cutoff_specifics = cla.getOptionValueAsDouble( SPECIFICS_CUTOFF_OPTION ); + if ( cutoff_specifics < 0 ) { + ForesterUtil.fatalError( PRG_NAME, "cutoff cannot be negative" ); + } + } + else { + ForesterUtil.fatalError( PRG_NAME, "no value for cutoff for specifics" ); + } + } + String separator = SEP_DEFAULT; if ( cla.isOptionSet( SEP_OPTION ) ) { - separator = cla.getOptionValue( SEP_OPTION ); + if ( cla.isOptionValueSet( SEP_OPTION ) ) { + separator = cla.getOptionValue( SEP_OPTION ); + } + else { + ForesterUtil.fatalError( PRG_NAME, "no value for separator option" ); + } } - else { - separator = null; + Pattern compiled_query_str = null; + if ( cla.isOptionSet( QUERY_PATTERN_OPTION ) ) { + if ( cla.isOptionValueSet( QUERY_PATTERN_OPTION ) ) { + final String query_str = cla.getOptionValue( QUERY_PATTERN_OPTION ); + try { + compiled_query_str = Pattern.compile( query_str ); + } + catch ( final PatternSyntaxException e ) { + ForesterUtil.fatalError( PRG_NAME, "error in regular expression: " + e.getMessage() ); + } + } + else { + ForesterUtil.fatalError( PRG_NAME, "no value for query pattern option" ); + } } + final Pattern pattern = ( compiled_query_str != null ) ? compiled_query_str : QUERY_PATTERN_DEFAULT; final File intreefile = cla.getFile( 0 ); - final String query = cla.getName( 1 ); - System.out.println( "Input tree: " + intreefile ); - System.out.println( "Query : " + query ); - if ( !ForesterUtil.isEmpty( separator ) ) { - System.out.println( "Separator : " + separator ); + final String error_intreefile = ForesterUtil.isReadableFile( intreefile ); + if ( !ForesterUtil.isEmpty( error_intreefile ) ) { + ForesterUtil.fatalError( PRG_NAME, error_intreefile ); + } + final File outtablefile; + if ( cla.getNumberOfNames() > 1 ) { + outtablefile = cla.getFile( 1 ); + final String error_outtablefile = ForesterUtil.isWritableFile( outtablefile ); + if ( !ForesterUtil.isEmpty( error_outtablefile ) ) { + ForesterUtil.fatalError( PRG_NAME, error_outtablefile ); + } } else { - System.out.println( "Separator : none" ); + outtablefile = null; + } + System.out.println( "Input tree : " + intreefile ); + System.out.println( "Specific-hit support cutoff: " + cutoff_specifics ); + System.out.println( "Annotation-separator : " + separator ); + System.out.println( "Query pattern : " + pattern ); + if ( outtablefile != null ) { + System.out.println( "Output table : " + outtablefile ); } Phylogeny p = null; try { @@ -114,25 +163,48 @@ public final class cladinator { p = factory.create( intreefile, pp )[ 0 ]; } catch ( final IOException e ) { - System.out.println( "\nCould not read \"" + intreefile + "\" [" + e.getMessage() + "]\n" ); + ForesterUtil.fatalError( PRG_NAME, "Could not read \"" + intreefile + "\" [" + e.getMessage() + "]" ); System.exit( -1 ); } - final Pattern pattern = Pattern.compile( query ); - final ResultMulti res = AnalysisMulti.execute( p, pattern, separator, 0.5 ); - System.out.println(); - System.out.println( "Result:" ); - System.out.println( "Query : " + query ); - /////////////////// - System.out.println( "Collapsed:" ); + System.out.println( "Ext. nodes in input tree : " + p.getNumberOfExternalNodes() ); + final ResultMulti res = AnalysisMulti.execute( p, pattern, separator, cutoff_specifics ); + printResult( res ); + if ( outtablefile != null ) { + writeResultToTable( res, outtablefile ); + } + } + catch ( final IllegalArgumentException e ) { + ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + } + catch ( final Exception e ) { + e.printStackTrace(); + ForesterUtil.fatalError( PRG_NAME, "Unexpected errror!" ); + } + } + + private final static void printResult( final ResultMulti res ) { + System.out.println(); + System.out.println( "Result:" ); + System.out.println(); + if ( ( res.getAllMultiHitPrefixes() == null ) | ( res.getAllMultiHitPrefixes().size() < 1 ) ) { + System.out.println( "No match to query pattern!" ); + } + else { + System.out.println( "Matching Clade(s):" ); for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) { System.out.println( prefix ); } if ( res.isHasSpecificMultiHitsPrefixes() ) { - System.out.println( "Specifics:" ); + System.out.println(); + System.out.println( "Specific-hit(s):" ); for( final Prefix prefix : res.getSpecificMultiHitPrefixes() ) { System.out.println( prefix ); } - System.out.println( "Collapsed With Specifics:" ); + System.out.println(); + System.out.println( "Matching Clade(s) with Specific-hit(s):" ); for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) { System.out.println( prefix ); for( final Prefix spec : res.getSpecificMultiHitPrefixes() ) { @@ -143,40 +215,90 @@ public final class cladinator { } } if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesDown() ) ) { - System.out.println( "Collapsed Down:" ); + System.out.println(); + System.out.println( "Matching Down-tree Bracketing Clade(s):" ); for( final Prefix prefix : res.getCollapsedMultiHitPrefixesDown() ) { System.out.println( prefix ); } } if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesUp() ) ) { - System.out.println( "Collapsed Up:" ); - for( final Prefix prefix : res.getAllMultiHitPrefixesUp() ) { + System.out.println(); + System.out.println( "Matching Up-tree Bracketing Clade(s):" ); + for( final Prefix prefix : res.getCollapsedMultiHitPrefixesUp() ) { System.out.println( prefix ); } } - /////////////////// - System.out.println(); } - catch ( final IllegalArgumentException e ) { - ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); + System.out.println(); + } + + private final static void writeResultToTable( final ResultMulti res, final File outtablefile ) throws IOException { + final EasyWriter w = ForesterUtil.createEasyWriter( outtablefile ); + if ( ( res.getAllMultiHitPrefixes() == null ) | ( res.getAllMultiHitPrefixes().size() < 1 ) ) { + w.println( "No match to query pattern!" ); } - catch ( final Exception e ) { - e.printStackTrace(); - ForesterUtil.fatalError( PRG_NAME, "Unexpected errror!" ); + else { + for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) { + w.print( "Matching Clades" ); + w.print( "\t" ); + w.print( prefix.getPrefix() ); + w.print( "\t" ); + w.print( df.format( prefix.getConfidence() ) ); + w.println(); + } + if ( res.isHasSpecificMultiHitsPrefixes() ) { + for( final Prefix prefix : res.getSpecificMultiHitPrefixes() ) { + w.print( "Specific-hits" ); + w.print( "\t" ); + w.print( prefix.getPrefix() ); + w.print( "\t" ); + w.print( df.format( prefix.getConfidence() ) ); + w.println(); + } + } + if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesDown() ) ) { + for( final Prefix prefix : res.getCollapsedMultiHitPrefixesDown() ) { + w.print( "Matching Down-tree Bracketing Clades" ); + w.print( "\t" ); + w.print( prefix.getPrefix() ); + w.print( "\t" ); + w.print( df.format( prefix.getConfidence() ) ); + w.println(); + } + } + if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesUp() ) ) { + for( final Prefix prefix : res.getCollapsedMultiHitPrefixesUp() ) { + w.print( "Matching Up-tree Bracketing Clades" ); + w.print( "\t" ); + w.print( prefix.getPrefix() ); + w.print( "\t" ); + w.print( df.format( prefix.getConfidence() ) ); + w.println(); + } + } } + w.flush(); + w.close(); } private final static void print_help() { System.out.println( "Usage:" ); System.out.println(); - System.out.println( PRG_NAME + " [options] " ); + System.out.println( PRG_NAME + " [options] [output table file]" ); System.out.println(); System.out.println( " options:" ); - System.out.println( " -" + SEP_OPTION + "=: the separator to be used" ); + System.out.println( " -" + SPECIFICS_CUTOFF_OPTION + + "=: the cutoff for \"specific-hit\" support values (default: " + SPECIFICS_CUTOFF_DEFAULT + + ")" ); + System.out.println( " -" + SEP_OPTION + "=: the annotation-separator to be used (default: " + + SEP_DEFAULT + ")" ); + System.out.println( " -" + QUERY_PATTERN_OPTION + + "=: the regular expression for the query (default: \"" + QUERY_PATTERN_DEFAULT + + "\" for pplacer output)" ); System.out.println(); System.out.println( "Example:" ); System.out.println(); - System.out.println( " " + PRG_NAME + " -s=. my_tree.xml A.1.1.1" ); + System.out.println( " " + PRG_NAME + " -c=0.5 -s=. my_tree.nh result.tsv" ); System.out.println(); } } diff --git a/forester/java/src/org/forester/application/serin.java b/forester/java/src/org/forester/application/rid.java similarity index 96% rename from forester/java/src/org/forester/application/serin.java rename to forester/java/src/org/forester/application/rid.java index a9ee157..9d25d8d 100644 --- a/forester/java/src/org/forester/application/serin.java +++ b/forester/java/src/org/forester/application/rid.java @@ -23,16 +23,16 @@ import org.forester.util.CommandLineArguments; import org.forester.util.ForesterConstants; import org.forester.util.ForesterUtil; -public class serin { +public class rid { - final static private String PRG_NAME = "serin"; - final static private String PRG_DATE = "170830"; + final static private String PRG_NAME = "rid"; + final static private String PRG_DATE = "170902"; final static private String PRG_DESC = "sequence file reformatting and identifier normalization"; final static private String PRG_VERSION = "1.00"; final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; final static private String E_MAIL = "phyloxml@gmail.com"; final static private String OUTPUT_FORMAT_OPTION = "o"; - final static private String ID_NORM_OPTION = "i"; + final static private String ID_NORM_OPTION = "s"; final static private String HELP_OPTION_1 = "help"; final static private String HELP_OPTION_2 = "h"; private static final String OUTPUT_FORMAT_FASTA = "f"; @@ -138,9 +138,12 @@ public class serin { output_format = MSA_FORMAT.NEXUS; } else { - ForesterUtil.fatalError( PRG_NAME, "unknown format option: " + output_format_str ); + ForesterUtil.fatalError( PRG_NAME, "unknown output format option: " + output_format_str ); } } + else { + ForesterUtil.fatalError( PRG_NAME, "no value for output format option" ); + } } final boolean normalize_identifiers; if ( cla.isOptionSet( ID_NORM_OPTION ) || ( cla.getNumberOfNames() == 3 ) ) { @@ -252,7 +255,7 @@ public class serin { System.out.println( "Sequence lenght min : " + ( int ) stats.getMin() ); System.out.println( "Sequence lenght max : " + ( int ) stats.getMax() ); if ( input_seqs.size() > 2 ) { - System.out.println( "Sequence lenght median: " + ( int ) stats.median() ); + System.out.println( "Sequence length median: " + ( int ) stats.median() ); } if ( ( output_format == MSA_FORMAT.NEXUS ) || ( output_format == MSA_FORMAT.PHYLIP ) ) { ForesterUtil.fatalError( PRG_NAME, @@ -283,10 +286,10 @@ public class serin { seq.getMolecularSequenceAsString() ); output_seqs.add( ns ); } + System.out.println(); if ( normalize_identifiers ) { output_map_writer.flush(); output_map_writer.close(); - System.out.println(); System.out.println( "Wrote : " + output_map_file ); } final BufferedWriter seq_writer = ForesterUtil.createBufferedWriter( outfile_seqs_file ); @@ -333,11 +336,11 @@ public class serin { + OUTPUT_FORMAT_FASTA + " for Fasta (default), " + OUTPUT_FORMAT_PHYLIP_L + " or " + OUTPUT_FORMAT_PHYLIP + " for Phylip, " + OUTPUT_FORMAT_NEXUS_L + " or " + OUTPUT_FORMAT_NEXUS + " for Nexus" ); - System.out.println( " -" + ID_NORM_OPTION + ": to replace sequence names with short(er) identifiers" ); + System.out.println( " -" + ID_NORM_OPTION + " : to replace sequence names with short(er) identifiers" ); System.out.println(); System.out.println( "Example:" ); System.out.println(); - System.out.println( " " + PRG_NAME + " -i -o=p my_seqs.fasta" ); + System.out.println( " " + PRG_NAME + " -s -o=p my_seqs.fasta" ); System.out.println(); } } diff --git a/forester/java/src/org/forester/clade_analysis/ResultMulti.java b/forester/java/src/org/forester/clade_analysis/ResultMulti.java index f9822ec..0fcda8b 100644 --- a/forester/java/src/org/forester/clade_analysis/ResultMulti.java +++ b/forester/java/src/org/forester/clade_analysis/ResultMulti.java @@ -40,9 +40,9 @@ import org.forester.util.ForesterUtil; public final class ResultMulti { private final String _separator; - private final List _greatest_common_prefixes = new ArrayList(); - private final List _greatest_common_prefixes_up = new ArrayList(); - private final List _greatest_common_prefixes_down = new ArrayList(); + private final List _greatest_common_prefixes = new ArrayList<>(); + private final List _greatest_common_prefixes_up = new ArrayList<>(); + private final List _greatest_common_prefixes_down = new ArrayList<>(); private List _all = null; private List _collapsed = null; private List _cleaned_spec = null; @@ -117,14 +117,7 @@ public final class ResultMulti { @Override public final String toString() { final StringBuilder sb = new StringBuilder(); - // sb.append( "Cleaned:" ); - // sb.append( ForesterUtil.LINE_SEPARATOR ); - // for( final Prefix prefix : _all ) { - // sb.append( prefix ); - // sb.append( ForesterUtil.LINE_SEPARATOR ); - // } - // sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( "Collapsed:" ); + sb.append( "Matching Clade(s):" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix prefix : _collapsed ) { sb.append( prefix ); @@ -132,14 +125,14 @@ public final class ResultMulti { } if ( _has_specifics ) { sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( "Specifics:" ); + sb.append( "Specific-hit(s):" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix prefix : _cleaned_spec ) { sb.append( prefix ); sb.append( ForesterUtil.LINE_SEPARATOR ); } sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( "Collapsed With Specifics:" ); + sb.append( "Matching Clade(s) with Specific-hit(s):" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix prefix : _collapsed ) { sb.append( prefix ); @@ -154,79 +147,21 @@ public final class ResultMulti { } if ( !ForesterUtil.isEmpty( _all_down ) ) { sb.append( ForesterUtil.LINE_SEPARATOR ); - // sb.append( "Cleaned Down:" ); - // sb.append( ForesterUtil.LINE_SEPARATOR ); - // for( final Prefix prefix : _all_down ) { - // sb.append( prefix ); - // sb.append( ForesterUtil.LINE_SEPARATOR ); - // } - // sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( "Collapsed Down:" ); + sb.append( "Matching Down-tree Bracketing Clade(s):" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix prefix : _collapsed_down ) { sb.append( prefix ); sb.append( ForesterUtil.LINE_SEPARATOR ); } - /* if ( _has_specifics_down ) { - sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( "Specifics Down:" ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - for( final Prefix prefix : _cleaned_spec_down ) { - sb.append( prefix ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - } - sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( "Collapsed With Specifics Down:" ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - for( final Prefix prefix : _collapsed_down ) { - sb.append( prefix ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - for( final Prefix spec : _cleaned_spec_down ) { - if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) { - sb.append( " " + spec ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - } - } - } - }*/ } if ( !ForesterUtil.isEmpty( _all_up ) ) { sb.append( ForesterUtil.LINE_SEPARATOR ); - // sb.append( "Cleaned Up:" ); - // sb.append( ForesterUtil.LINE_SEPARATOR ); - // for( final Prefix prefix : _all_up ) { - // sb.append( prefix ); - // sb.append( ForesterUtil.LINE_SEPARATOR ); - // } - // sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( "Collapsed Up:" ); + sb.append( "Matching Up-tree Bracketing Clade(s):" ); sb.append( ForesterUtil.LINE_SEPARATOR ); for( final Prefix prefix : _collapsed_up ) { sb.append( prefix ); sb.append( ForesterUtil.LINE_SEPARATOR ); } - /* if ( _has_specifics ) { - sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( "Specifics Up:" ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - for( final Prefix prefix : _cleaned_spec_up ) { - sb.append( prefix ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - } - sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( "Collapsed With Specifics Up:" ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - for( final Prefix prefix : _collapsed_up ) { - sb.append( prefix ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - for( final Prefix spec : _cleaned_spec_up ) { - if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) { - sb.append( " " + spec ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - } - } - } - }*/ } return sb.toString(); } @@ -251,17 +186,17 @@ public final class ResultMulti { } private final void reset() { - _all = new ArrayList(); - _collapsed = new ArrayList(); - _cleaned_spec = new ArrayList(); + _all = new ArrayList<>(); + _collapsed = new ArrayList<>(); + _cleaned_spec = new ArrayList<>(); _has_specifics = false; - _all_up = new ArrayList(); - _collapsed_up = new ArrayList(); - _cleaned_spec_up = new ArrayList(); + _all_up = new ArrayList<>(); + _collapsed_up = new ArrayList<>(); + _cleaned_spec_up = new ArrayList<>(); _has_specifics_up = false; - _all_down = new ArrayList(); - _collapsed_down = new ArrayList(); - _cleaned_spec_down = new ArrayList(); + _all_down = new ArrayList<>(); + _collapsed_down = new ArrayList<>(); + _cleaned_spec_down = new ArrayList<>(); _has_specifics_down = false; } diff --git a/forester/java/src/org/forester/msa/BasicMsa.java b/forester/java/src/org/forester/msa/BasicMsa.java index 02da257..bf215d8 100644 --- a/forester/java/src/org/forester/msa/BasicMsa.java +++ b/forester/java/src/org/forester/msa/BasicMsa.java @@ -143,7 +143,7 @@ public class BasicMsa implements Msa { } if ( _identifiers_set.contains( id ) ) { throw new IllegalArgumentException( "illegal attempt to create msa with non-unique identifiers [" + id - + "]" ); + + "]" ); } _identifiers_set.add( id ); _identifiers[ row ] = id; @@ -200,13 +200,32 @@ public class BasicMsa implements Msa { private void writeToNexus( final Writer w ) throws IOException { final int max = determineMaxIdLength() + 1; + TYPE t = null; + for( int row = 0; row < getNumberOfSequences(); ++row ) { + t = ForesterUtil.guessMolecularSequenceType( getSequence( row ).getMolecularSequenceAsString() ); + if ( t != null ) { + break; + } + } + String type_str = "Protein"; + if ( t != null ) { + if ( t == TYPE.DNA ) { + type_str = "DNA"; + } + else if ( t == TYPE.RNA ) { + type_str = "RNA"; + } + else if ( t == TYPE.GENERAL ) { + type_str = "Standard"; + } + } w.write( "Begin Data;" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( " Dimensions NTax=" + getNumberOfSequences() ); w.write( " NChar=" + getLength() ); w.write( ";" ); w.write( ForesterUtil.LINE_SEPARATOR ); - w.write( " Format DataType=Protein Interleave=No gap=-;" ); + w.write( " Format DataType=" + type_str + " Interleave=No gap=-;" ); w.write( ForesterUtil.LINE_SEPARATOR ); w.write( " Matrix" ); w.write( ForesterUtil.LINE_SEPARATOR ); diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index dc89976..5d70578 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -542,15 +542,16 @@ public final class ForesterUtil { } final public static MolecularSequence.TYPE guessMolecularSequenceType( final String mol_seq ) { - if ( mol_seq.contains( "L" ) || mol_seq.contains( "I" ) || mol_seq.contains( "E" ) || mol_seq.contains( "H" ) - || mol_seq.contains( "D" ) || mol_seq.contains( "Q" ) ) { + final String s = mol_seq.toUpperCase(); + if ( s.contains( "L" ) || s.contains( "I" ) || s.contains( "E" ) || s.contains( "H" ) + || s.contains( "D" ) || s.contains( "Q" ) ) { return TYPE.AA; } else { - if ( mol_seq.contains( "T" ) ) { + if ( s.contains( "T" ) ) { return TYPE.DNA; } - else if ( mol_seq.contains( "U" ) ) { + else if ( s.contains( "U" ) ) { return TYPE.RNA; } } -- 1.7.10.2