import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.List;
+import java.util.regex.Pattern;
import org.forester.clade_analysis.AnalysisMulti;
-import org.forester.clade_analysis.AnalysisSingle;
import org.forester.clade_analysis.Prefix;
import org.forester.clade_analysis.ResultMulti;
-import org.forester.clade_analysis.ResultSingle;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.util.ParserUtils;
import org.forester.phylogeny.Phylogeny;
print_help();
System.exit( 0 );
}
- else if ( ( args.length != 2 && args.length != 3 ) ) {
+ else if ( ( ( args.length != 2 ) && ( args.length != 3 ) ) ) {
System.out.println();
System.out.println( "Wrong number of arguments." );
System.out.println();
print_help();
System.exit( -1 );
}
- final List<String> allowed_options = new ArrayList<String>();
+ final List<String> allowed_options = new ArrayList<>();
allowed_options.add( SEP_OPTION );
final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
if ( dissallowed_options.length() > 0 ) {
System.out.println( "\nCould not read \"" + intreefile + "\" [" + e.getMessage() + "]\n" );
System.exit( -1 );
}
-
- final ResultMulti res = AnalysisMulti.execute( p, query, separator, 0.5 );
-
+ final Pattern pattern = Pattern.compile( query );
+ final ResultMulti res = AnalysisMulti.execute( p, pattern, separator, 0.5 );
System.out.println();
System.out.println( "Result:" );
System.out.println( "Query : " + query );
-
///////////////////
-
-
-
System.out.println( "Collapsed:" );
-
- for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) {
- System.out.println( prefix );
- }
- if ( _has_specifics ) {
-
- System.out.println( "Specifics:" );
-
- for( final Prefix prefix : _cleaned_spec ) {
- System.out.println( prefix );
-
- }
-
- System.out.println( "Collapsed With Specifics:" );
-
- for( final Prefix prefix : _collapsed ) {
- System.out.println( prefix );
-
- for( final Prefix spec : _cleaned_spec ) {
- if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
- System.out.println( " " + spec );
-
- }
- }
- }
- }
- if ( !ForesterUtil.isEmpty( _all_down ) ) {
-
- System.out.println( "Collapsed Down:" );
-
- for( final Prefix prefix : _collapsed_down ) {
- System.out.println( prefix );
-
- }
-
- }
- if ( !ForesterUtil.isEmpty( _all_up ) ) {
-
-
- System.out.println( "Collapsed Up:" );
-
- for( final Prefix prefix : _collapsed_up ) {
- System.out.println( prefix );
-
- }
-
- }
-
- ///////////////////
-
-
- System.out.print( "Greatest Common Prefix : " + res.getGreatestCommonPrefix() );
- if ( !ForesterUtil.isEmpty( res.getGreatestCommonPrefix() )
- && !ForesterUtil.isEmpty( res.getGreatestCommonCladeSubtreeConfidence() ) ) {
- System.out.println( "\t(" + res.getGreatestCommonCladeSubtreeConfidence() + ")" );
- }
- else {
- System.out.println();
- }
- System.out.print( "Greatest Common Prefix Up : " + res.getGreatestCommonPrefixUp() );
- if ( !ForesterUtil.isEmpty( res.getGreatestCommonPrefixUp() )
- && !ForesterUtil.isEmpty( res.getGreatestCommonCladeUpSubtreeConfidence() ) ) {
- System.out.println( "\t(" + res.getGreatestCommonCladeUpSubtreeConfidence() + ")" );
- }
- else {
- System.out.println();
+ for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) {
+ System.out.println( prefix );
}
- System.out.print( "Greatest Common Prefix Down : " + res.getGreatestCommonPrefixDown() );
- if ( !ForesterUtil.isEmpty( res.getGreatestCommonPrefixDown() )
- && !ForesterUtil.isEmpty( res.getGreatestCommonCladeDownSubtreeConfidence() ) ) {
- System.out.println( "\t(" + res.getGreatestCommonCladeDownSubtreeConfidence() + ")" );
+ if ( res.isHasSpecificMultiHitsPrefixes() ) {
+ System.out.println( "Specifics:" );
+ for( final Prefix prefix : res.getSpecificMultiHitPrefixes() ) {
+ System.out.println( prefix );
+ }
+ System.out.println( "Collapsed With Specifics:" );
+ for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) {
+ System.out.println( prefix );
+ for( final Prefix spec : res.getSpecificMultiHitPrefixes() ) {
+ if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
+ System.out.println( " " + spec );
+ }
+ }
+ }
}
- else {
- System.out.println();
+ if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesDown() ) ) {
+ System.out.println( "Collapsed Down:" );
+ for( final Prefix prefix : res.getCollapsedMultiHitPrefixesDown() ) {
+ System.out.println( prefix );
+ }
}
- System.out.println( "Least Encompassing Clade size: " + res.getLeastEncompassingCladeSize()
- + " external nodes" );
- final double lec_ratio = ( 100.0 * res.getLeastEncompassingCladeSize() ) / res.getTreeSize();
- System.out.println( "Least Encompassing Clade size: " + df2.format( lec_ratio ) + "%" );
- System.out.println( "Total tree size : " + res.getTreeSize() + " external nodes" );
- if ( res.getWarnings().size() > 0 ) {
- System.out.println( "Warnings:" );
- for( final String s : res.getWarnings() ) {
- System.out.println( s );
+ if ( !ForesterUtil.isEmpty( res.getAllMultiHitPrefixesUp() ) ) {
+ System.out.println( "Collapsed Up:" );
+ for( final Prefix prefix : res.getAllMultiHitPrefixesUp() ) {
+ System.out.println( prefix );
}
}
+ ///////////////////
System.out.println();
}
catch ( final IllegalArgumentException e ) {
--- /dev/null
+
+package org.forester.application;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.FastaParser;
+import org.forester.io.parsers.GeneralMsaParser;
+import org.forester.io.writers.SequenceWriter;
+import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
+import org.forester.msa.BasicMsa;
+import org.forester.msa.Msa;
+import org.forester.msa.Msa.MSA_FORMAT;
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.MolecularSequence;
+import org.forester.util.BasicDescriptiveStatistics;
+import org.forester.util.CommandLineArguments;
+import org.forester.util.ForesterConstants;
+import org.forester.util.ForesterUtil;
+
+public class serin {
+
+ final static private String PRG_NAME = "serin";
+ final static private String PRG_DATE = "170830";
+ final static private String PRG_DESC = "sequence file reformatting and identifier normalization";
+ final static private String PRG_VERSION = "1.00";
+ final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
+ final static private String E_MAIL = "phyloxml@gmail.com";
+ final static private String OUTPUT_FORMAT_OPTION = "o";
+ final static private String ID_NORM_OPTION = "i";
+ final static private String HELP_OPTION_1 = "help";
+ final static private String HELP_OPTION_2 = "h";
+ private static final String OUTPUT_FORMAT_FASTA = "f";
+ private static final String OUTPUT_FORMAT_PHYLIP = "p";
+ private static final String OUTPUT_FORMAT_NEXUS = "n";
+ private static final String OUTPUT_FORMAT_FASTA_L = "fasta";
+ private static final String OUTPUT_FORMAT_PHYLIP_L = "phylip";
+ private static final String OUTPUT_FORMAT_NEXUS_L = "nexus";
+
+ public static void main( final String args[] ) {
+ try {
+ ForesterUtil.printProgramInformation( PRG_NAME,
+ PRG_DESC,
+ PRG_VERSION,
+ PRG_DATE,
+ E_MAIL,
+ WWW,
+ ForesterUtil.getForesterLibraryInformation() );
+ CommandLineArguments cla = null;
+ try {
+ cla = new CommandLineArguments( args );
+ }
+ catch ( final Exception e ) {
+ ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+ }
+ if ( ( cla.getNumberOfNames() == 0 ) || cla.isOptionSet( HELP_OPTION_1 )
+ || cla.isOptionSet( HELP_OPTION_2 ) ) {
+ System.out.println();
+ print_help();
+ System.exit( 0 );
+ }
+ String input_seqs_file_str = null;
+ String output_seqs_file_str = null;
+ String output_map_file_str = null;
+ String input_seqs_name_wo_suffix = null;
+ if ( ( cla.getNumberOfNames() == 2 ) || ( cla.getNumberOfNames() == 3 ) ) {
+ input_seqs_file_str = cla.getName( 0 );
+ output_seqs_file_str = cla.getName( 1 );
+ if ( cla.getNumberOfNames() == 3 ) {
+ output_map_file_str = cla.getName( 2 );
+ }
+ }
+ else if ( cla.getNumberOfNames() == 1 ) {
+ input_seqs_file_str = cla.getName( 0 );
+ input_seqs_name_wo_suffix = null;
+ if ( input_seqs_file_str.toLowerCase().endsWith( ".fasta" ) ) {
+ input_seqs_name_wo_suffix = input_seqs_file_str.substring( 0, input_seqs_file_str.length() - 6 );
+ }
+ else if ( input_seqs_file_str.toLowerCase().endsWith( ".fsa" ) ) {
+ input_seqs_name_wo_suffix = input_seqs_file_str.substring( 0, input_seqs_file_str.length() - 4 );
+ }
+ else if ( input_seqs_file_str.toLowerCase().endsWith( ".phy" ) ) {
+ input_seqs_name_wo_suffix = input_seqs_file_str.substring( 0, input_seqs_file_str.length() - 4 );
+ }
+ else if ( input_seqs_file_str.toLowerCase().endsWith( ".aln" ) ) {
+ input_seqs_name_wo_suffix = input_seqs_file_str.substring( 0, input_seqs_file_str.length() - 4 );
+ }
+ else if ( input_seqs_file_str.toLowerCase().endsWith( ".phylip" ) ) {
+ input_seqs_name_wo_suffix = input_seqs_file_str.substring( 0, input_seqs_file_str.length() - 7 );
+ }
+ else if ( input_seqs_file_str.toLowerCase().endsWith( ".nex" ) ) {
+ input_seqs_name_wo_suffix = input_seqs_file_str.substring( 0, input_seqs_file_str.length() - 4 );
+ }
+ else if ( input_seqs_file_str.toLowerCase().endsWith( ".nexus" ) ) {
+ input_seqs_name_wo_suffix = input_seqs_file_str.substring( 0, input_seqs_file_str.length() - 5 );
+ }
+ else {
+ input_seqs_name_wo_suffix = input_seqs_file_str;
+ }
+ output_map_file_str = input_seqs_name_wo_suffix + ForesterConstants.ID_MAP_FILE_SUFFIX;
+ }
+ else {
+ print_help();
+ System.exit( -1 );
+ }
+ final List<String> allowed_options = new ArrayList<>();
+ allowed_options.add( OUTPUT_FORMAT_OPTION );
+ allowed_options.add( ID_NORM_OPTION );
+ final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
+ if ( dissallowed_options.length() > 0 ) {
+ ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
+ }
+ final File input_seqs_file = new File( input_seqs_file_str );
+ final String error0 = ForesterUtil.isReadableFile( input_seqs_file );
+ if ( !ForesterUtil.isEmpty( error0 ) ) {
+ ForesterUtil.fatalError( PRG_NAME, error0 );
+ }
+ final boolean input_seqs_fasta_like = ForesterUtil.isLooksLikeFasta( input_seqs_file );
+ Msa.MSA_FORMAT output_format = MSA_FORMAT.FASTA;
+ if ( cla.isOptionSet( OUTPUT_FORMAT_OPTION ) ) {
+ if ( cla.isOptionValueSet( OUTPUT_FORMAT_OPTION ) ) {
+ final String output_format_str = cla.getOptionValue( OUTPUT_FORMAT_OPTION );
+ if ( output_format_str.equals( OUTPUT_FORMAT_FASTA )
+ || output_format_str.equalsIgnoreCase( OUTPUT_FORMAT_FASTA_L ) ) {
+ output_format = MSA_FORMAT.FASTA;
+ }
+ else if ( output_format_str.equals( OUTPUT_FORMAT_PHYLIP )
+ || output_format_str.equalsIgnoreCase( OUTPUT_FORMAT_PHYLIP_L ) ) {
+ output_format = MSA_FORMAT.PHYLIP;
+ }
+ else if ( output_format_str.equals( OUTPUT_FORMAT_NEXUS )
+ || output_format_str.equalsIgnoreCase( OUTPUT_FORMAT_NEXUS_L ) ) {
+ output_format = MSA_FORMAT.NEXUS;
+ }
+ else {
+ ForesterUtil.fatalError( PRG_NAME, "unknown format option: " + output_format_str );
+ }
+ }
+ }
+ final boolean normalize_identifiers;
+ if ( cla.isOptionSet( ID_NORM_OPTION ) || ( cla.getNumberOfNames() == 3 ) ) {
+ normalize_identifiers = true;
+ }
+ else {
+ normalize_identifiers = false;
+ }
+ if ( normalize_identifiers && ForesterUtil.isEmpty( output_map_file_str ) ) {
+ ForesterUtil.fatalError( PRG_NAME, "need to indicate name for output map file" );
+ }
+ final File output_map_file;
+ if ( normalize_identifiers ) {
+ output_map_file = new File( output_map_file_str );
+ final String error = ForesterUtil.isWritableFile( output_map_file );
+ if ( !ForesterUtil.isEmpty( error ) ) {
+ ForesterUtil.fatalError( PRG_NAME, error );
+ }
+ }
+ else {
+ output_map_file = null;
+ }
+ if ( cla.getNumberOfNames() == 1 ) {
+ if ( normalize_identifiers ) {
+ if ( output_format == MSA_FORMAT.FASTA ) {
+ output_seqs_file_str = input_seqs_name_wo_suffix
+ + ForesterConstants.ID_NORMALIZED_FASTA_FILE_SUFFIX;
+ }
+ else if ( output_format == MSA_FORMAT.NEXUS ) {
+ output_seqs_file_str = input_seqs_name_wo_suffix
+ + ForesterConstants.ID_NORMALIZED_NEXUS_FILE_SUFFIX;
+ }
+ else if ( output_format == MSA_FORMAT.PHYLIP ) {
+ output_seqs_file_str = input_seqs_name_wo_suffix
+ + ForesterConstants.ID_NORMALIZED_PHYLIP_FILE_SUFFIX;
+ }
+ }
+ else {
+ if ( output_format == MSA_FORMAT.FASTA ) {
+ output_seqs_file_str = input_seqs_name_wo_suffix + ForesterConstants.FASTA_FILE_SUFFIX;
+ if ( ForesterUtil.isWritableFile( output_seqs_file_str ) != null ) {
+ output_seqs_file_str = input_seqs_name_wo_suffix + "_"
+ + ForesterConstants.FASTA_FILE_SUFFIX;
+ }
+ }
+ else if ( output_format == MSA_FORMAT.NEXUS ) {
+ output_seqs_file_str = input_seqs_name_wo_suffix + ForesterConstants.NEXUS_FILE_SUFFIX;
+ if ( ForesterUtil.isWritableFile( output_seqs_file_str ) != null ) {
+ output_seqs_file_str = input_seqs_name_wo_suffix + "_"
+ + ForesterConstants.NEXUS_FILE_SUFFIX;
+ }
+ }
+ else if ( output_format == MSA_FORMAT.PHYLIP ) {
+ output_seqs_file_str = input_seqs_name_wo_suffix + ForesterConstants.PHYLIP_FILE_SUFFIX;
+ if ( ForesterUtil.isWritableFile( output_seqs_file_str ) != null ) {
+ output_seqs_file_str = input_seqs_name_wo_suffix + "_"
+ + ForesterConstants.PHYLIP_FILE_SUFFIX;
+ }
+ }
+ }
+ }
+ final File outfile_seqs_file = new File( output_seqs_file_str );
+ final String error1 = ForesterUtil.isWritableFile( outfile_seqs_file );
+ if ( !ForesterUtil.isEmpty( error1 ) ) {
+ ForesterUtil.fatalError( PRG_NAME, error1 );
+ }
+ System.out.println();
+ if ( input_seqs_fasta_like ) {
+ System.out.println( "Input format : Fasta" );
+ }
+ else {
+ System.out.println( "Input format : Phylip like" );
+ }
+ System.out.println( "Input file : " + input_seqs_file_str );
+ if ( output_format == MSA_FORMAT.FASTA ) {
+ System.out.println( "Output format : Fasta" );
+ }
+ else if ( output_format == MSA_FORMAT.NEXUS ) {
+ System.out.println( "Output format : Nexus" );
+ }
+ else if ( output_format == MSA_FORMAT.PHYLIP ) {
+ System.out.println( "Output format : Phylip" );
+ }
+ System.out.println( "Output file : " + output_seqs_file_str );
+ System.out.println( "Shorten names : " + normalize_identifiers );
+ if ( normalize_identifiers ) {
+ System.out.println( "Identifier map : " + output_map_file_str );
+ }
+ final List<MolecularSequence> input_seqs;
+ final FileInputStream is = new FileInputStream( input_seqs_file );
+ if ( FastaParser.isLikelyFasta( input_seqs_file ) ) {
+ input_seqs = FastaParser.parse( is );
+ }
+ else {
+ input_seqs = GeneralMsaParser.parseSeqs( is );
+ }
+ if ( input_seqs == null ) {
+ ForesterUtil.fatalError( PRG_NAME, "failed to read input sequences" );
+ }
+ if ( input_seqs.size() < 1 ) {
+ ForesterUtil.fatalError( PRG_NAME, "input seems to be devoid of sequences" );
+ }
+ final BasicDescriptiveStatistics stats = new BasicDescriptiveStatistics();
+ for( final MolecularSequence seq : input_seqs ) {
+ stats.addValue( seq.getLength() );
+ }
+ System.out.println( "Number of sequences : " + input_seqs.size() );
+ if ( !ForesterUtil.isEqual( stats.getMin(), stats.getMax() ) ) {
+ System.out.println( "Sequence lenght min : " + ( int ) stats.getMin() );
+ System.out.println( "Sequence lenght max : " + ( int ) stats.getMax() );
+ if ( input_seqs.size() > 2 ) {
+ System.out.println( "Sequence lenght median: " + ( int ) stats.median() );
+ }
+ if ( ( output_format == MSA_FORMAT.NEXUS ) || ( output_format == MSA_FORMAT.PHYLIP ) ) {
+ ForesterUtil.fatalError( PRG_NAME,
+ "Input is not an alignment, cannot write in Nexus or Phylip format" );
+ }
+ }
+ else {
+ System.out.println( "Alignment length : " + ( int ) stats.getMax() );
+ }
+ final List<MolecularSequence> output_seqs = new ArrayList<>();
+ int counter = 0;
+ final BufferedWriter output_map_writer;
+ if ( normalize_identifiers ) {
+ output_map_writer = ForesterUtil.createBufferedWriter( output_map_file_str );
+ }
+ else {
+ output_map_writer = null;
+ }
+ for( final MolecularSequence seq : input_seqs ) {
+ final String new_name;
+ if ( normalize_identifiers ) {
+ new_name = modify_name( seq.getIdentifier(), counter++, output_map_writer );
+ }
+ else {
+ new_name = seq.getIdentifier();
+ }
+ final MolecularSequence ns = BasicSequence.createGeneralSequence( new_name,
+ seq.getMolecularSequenceAsString() );
+ output_seqs.add( ns );
+ }
+ if ( normalize_identifiers ) {
+ output_map_writer.flush();
+ output_map_writer.close();
+ System.out.println();
+ System.out.println( "Wrote : " + output_map_file );
+ }
+ final BufferedWriter seq_writer = ForesterUtil.createBufferedWriter( outfile_seqs_file );
+ if ( ( output_format == MSA_FORMAT.NEXUS ) || ( output_format == MSA_FORMAT.PHYLIP ) ) {
+ final Msa m = BasicMsa.createInstance( output_seqs );
+ m.write( seq_writer, output_format );
+ }
+ else if ( output_format == MSA_FORMAT.FASTA ) {
+ SequenceWriter.writeSeqs( output_seqs, seq_writer, SEQ_FORMAT.FASTA, 60 );
+ }
+ seq_writer.flush();
+ seq_writer.close();
+ System.out.println( "Wrote : " + outfile_seqs_file );
+ System.out.println();
+ }
+ catch ( final IllegalArgumentException e ) {
+ ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace();
+ ForesterUtil.fatalError( PRG_NAME, "Unexpected errror!" );
+ }
+ }
+
+ final static String modify_name( final String desc, final int counter, final Writer writer ) throws IOException {
+ desc.replaceAll( "\\s+", " " );
+ final String new_desc = Integer.toHexString( counter );
+ if ( new_desc.length() > 9 ) {
+ ForesterUtil.fatalError( PRG_NAME,
+ "shortened identifier [" + new_desc + "] is too long (" + new_desc.length()
+ + " characters)" );
+ }
+ writer.write( new_desc + "\t" + desc + "\n" );
+ return new_desc;
+ }
+
+ private final static void print_help() {
+ System.out.println( "Usage:" );
+ System.out.println();
+ System.out.println( PRG_NAME + " [options] <input sequences file> [output sequences file] [output map file]" );
+ System.out.println();
+ System.out.println( " options:" );
+ System.out.println( " -" + OUTPUT_FORMAT_OPTION + "=<format>: output format: " + OUTPUT_FORMAT_FASTA_L + " or "
+ + OUTPUT_FORMAT_FASTA + " for Fasta (default), " + OUTPUT_FORMAT_PHYLIP_L + " or "
+ + OUTPUT_FORMAT_PHYLIP + " for Phylip, " + OUTPUT_FORMAT_NEXUS_L + " or " + OUTPUT_FORMAT_NEXUS
+ + " for Nexus" );
+ System.out.println( " -" + ID_NORM_OPTION + ": to replace sequence names with short(er) identifiers" );
+ System.out.println();
+ System.out.println( "Example:" );
+ System.out.println();
+ System.out.println( " " + PRG_NAME + " -i -o=p my_seqs.fasta" );
+ System.out.println();
+ }
+}
+++ /dev/null
-
-package org.forester.application;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.Writer;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.forester.io.parsers.FastaParser;
-import org.forester.io.parsers.GeneralMsaParser;
-import org.forester.io.writers.SequenceWriter;
-import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
-import org.forester.msa.BasicMsa;
-import org.forester.msa.Msa;
-import org.forester.msa.Msa.MSA_FORMAT;
-import org.forester.sequence.BasicSequence;
-import org.forester.sequence.MolecularSequence;
-import org.forester.util.CommandLineArguments;
-import org.forester.util.ForesterConstants;
-import org.forester.util.ForesterUtil;
-
-public class tap {
-
- final static private String PRG_NAME = "tap";
- final static private String PRG_DATE = "170327";
- final static private String PRG_DESC = "Replacement of labels in multiple sequence files";
- final static private String PRG_VERSION = "1.00";
- final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
- final static private String E_MAIL = "phyloxml@gmail.com";
- final static private String EXTRACT_TAXONOMY_OPTION = "t";
- final static private String ANNOTATION_OPTION = "a";
- final static private String HELP_OPTION_1 = "help";
- final static private String HELP_OPTION_2 = "h";
-
- public static void main( final String args[] ) {
- try {
- ForesterUtil.printProgramInformation( PRG_NAME,
- PRG_DESC,
- PRG_VERSION,
- PRG_DATE,
- E_MAIL,
- WWW,
- ForesterUtil.getForesterLibraryInformation() );
- CommandLineArguments cla = null;
- try {
- cla = new CommandLineArguments( args );
- }
- catch ( final Exception e ) {
- ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
- }
- if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) ) {
- System.out.println();
- print_help();
- System.exit( 0 );
- }
- String input = null;
- String output = null;
- String list_file = null;
- String i = null;
- if ( args.length == 3 ) {
- input = cla.getName( 0 );
- output = cla.getName( 1 );
- list_file = cla.getName( 2 );
- }
- else if ( args.length == 1 ) {
- input = cla.getName( 0 );
- i = null;
- if ( input.toLowerCase().endsWith( ".fasta" ) ) {
- i = input.substring( 0, input.length() - 7 );
- }
- else if ( input.toLowerCase().endsWith( ".fsa" ) ) {
- i = input.substring( 0, input.length() - 5 );
- }
- else {
- i = input;
- }
- output = i + ForesterConstants.ID_NORMALIZED_FASTA_FILE_SUFFIX;
- list_file = i + ForesterConstants.ID_MAP_FILE_SUFFIX;
- }
- else {
- print_help();
- System.exit( -1 );
- }
- final List<String> allowed_options = new ArrayList<>();
- allowed_options.add( ANNOTATION_OPTION );
- final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
- if ( dissallowed_options.length() > 0 ) {
- ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
- }
- final File outfile_file = new File( output );
- final File listfile = new File( list_file );
- final File input_file = new File( input );
- final String error1 = ForesterUtil.isWritableFile( outfile_file );
- if ( !ForesterUtil.isEmpty( error1 ) ) {
- ForesterUtil.fatalError( PRG_NAME, error1 );
- }
- final String error2 = ForesterUtil.isWritableFile( listfile );
- if ( !ForesterUtil.isEmpty( error2 ) ) {
- ForesterUtil.fatalError( PRG_NAME, error2 );
- }
- final String error3 = ForesterUtil.isReadableFile( input_file );
- if ( !ForesterUtil.isEmpty( error3 ) ) {
- ForesterUtil.fatalError( PRG_NAME, error3 );
- }
- final boolean fasta_like = ForesterUtil.isLooksLikeFasta( input_file );
- final Msa.MSA_FORMAT output_format = MSA_FORMAT.FASTA;
- System.out.println();
- System.out.println( "Input alignment : " + input );
- System.out.println( "Output alignment : " + output );
- System.out.println( "Name list : " + list_file );
- if ( fasta_like ) {
- System.out.println( "Input format : Fasta" );
- }
- else {
- System.out.println( "Input format : Phylip like" );
- }
- if ( output_format == MSA_FORMAT.FASTA ) {
- System.out.println( "Output format : Fasta" );
- }
- else if ( output_format == MSA_FORMAT.NEXUS ) {
- System.out.println( "Output format : Nexus" );
- }
- else if ( output_format == MSA_FORMAT.PHYLIP ) {
- System.out.println( "Output format : Phylip" );
- }
- System.out.println();
-
- final List<MolecularSequence> seqs;
- final FileInputStream is = new FileInputStream( input_file );
- if ( FastaParser.isLikelyFasta( input_file ) ) {
- seqs = FastaParser.parse( is );
- }
- else {
- seqs = GeneralMsaParser.parseSeqs( is );
- }
- if ( seqs == null ) {
- ForesterUtil.fatalError( PRG_NAME, "failed to read MSA" );
- }
- if ( seqs.size() < 1 ) {
- ForesterUtil.fatalError( PRG_NAME, "MSA seems to be devoid of sequences" );
- }
- // TODO print number of seqs
- // TODO print number min length
- // TODO print max length
- // TODO OR
- // TODO print length is aligned
- // TODO if no aligned no phylip or nexus outpt
- //
-
- final List<MolecularSequence> seqs2 = new ArrayList<>();
- int counter = 0;
- final BufferedWriter writer = ForesterUtil.createBufferedWriter( list_file );
- for( final MolecularSequence seq : seqs ) {
- final String new_name = modify_name( seq.getIdentifier(), counter++, writer );
- final MolecularSequence ns = BasicSequence.createSequence( new_name,
- seq.getMolecularSequenceAsString() );
- seqs2.add( ns );
- }
- writer.flush();
- writer.close();
- final BufferedWriter seq_writer = ForesterUtil.createBufferedWriter( outfile_file );
- if ( ( output_format == MSA_FORMAT.NEXUS ) || ( output_format == MSA_FORMAT.PHYLIP ) ) {
- final Msa m = BasicMsa.createInstance( seqs2 );
- m.write( seq_writer, output_format );
- }
- else if ( output_format == MSA_FORMAT.FASTA ) {
- SequenceWriter.writeSeqs( seqs2, seq_writer, SEQ_FORMAT.FASTA, 60 );
- }
- seq_writer.flush();
- seq_writer.close();
- // Util.print_message( PRG_NAME, "wrote: " + list_file )
- // Util.print_message( PRG_NAME, "wrote: " + output )
- }
- catch ( final IllegalArgumentException e ) {
- ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
- }
- catch ( final Exception e ) {
- e.printStackTrace();
- ForesterUtil.fatalError( PRG_NAME, "Unexpected errror!" );
- }
- }
-
- final static String modify_name( final String desc, final int counter, final Writer writer ) throws IOException {
- desc.replaceAll( "\\s+", " " );
- final String new_desc = Integer.toHexString( counter );
- if ( new_desc.length() > 9 ) {
- ForesterUtil.fatalError( PRG_NAME,
- "shortened identifier [" + new_desc + "] is too long (" + new_desc.length()
- + " characters)" );
- }
- writer.write( new_desc + "\t" + desc + "\n" );
- return new_desc;
- }
-
- private final static void print_help() {
- System.out.println( "Usage:" );
- System.out.println();
- System.out.println( PRG_NAME + " [options] <gene tree file> <query>" );
- System.out.println();
- System.out.println( " options:" );
- //System.out.println( " -" + SEP_OPTION + "=<separator>: the separator to be used" );
- System.out.println();
- System.out.println( "Example:" );
- System.out.println();
- System.out.println( " " + PRG_NAME + " -s=. my_tree.xml A.1.1.1" );
- System.out.println();
- }
-}
import java.math.BigDecimal;
import java.text.DecimalFormat;
-final class Prefix {
+public final class Prefix {
private final static DecimalFormat df = new DecimalFormat( "0.0#####" );
private final String _prefix;
private final String _separator;
private final String _first;
- Prefix( final String prefix, final String confidence, final String separator ) {
+ public Prefix( final String prefix, final String confidence, final String separator ) {
_prefix = prefix;
_confidence = new BigDecimal( confidence );
_separator = separator;
}
}
- Prefix( final String prefix, final double confidence, final String separator ) {
+ public Prefix( final String prefix, final double confidence, final String separator ) {
_prefix = prefix;
_confidence = new BigDecimal( confidence );
_separator = separator;
}
}
- String getPrefix() {
+ public String getPrefix() {
return _prefix;
}
- String getPrefixFirstElement() {
+ public String getPrefixFirstElement() {
return _first;
}
- double getConfidence() {
+ public double getConfidence() {
return _confidence.doubleValue();
}
.replaceAll( re, Character.toString( repl ) ), type );
}
+ public static MolecularSequence createGeneralSequence( final String identifier, final String mol_sequence ) {
+ check( identifier, mol_sequence );
+ return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR
+ ), TYPE.GENERAL );
+ }
+
public static MolecularSequence createAaSequence( final String identifier, final String mol_sequence ) {
check( identifier, mol_sequence );
return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR )
public abstract TYPE getType();
public enum TYPE {
- RNA, DNA, AA;
+ RNA, DNA, AA, GENERAL;
}
}
\ No newline at end of file
public final class ForesterConstants {
- public final static String FORESTER_VERSION = "1.045";
- public final static String FORESTER_DATE = "161214";
- public final static String PHYLO_XML_VERSION = "1.20";
- public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org";
- public final static String PHYLO_XML_XSD = "phyloxml.xsd";
- public final static String XML_SCHEMA_INSTANCE = "http://www.w3.org/2001/XMLSchema-instance";
- public final static String LOCAL_PHYLOXML_XSD_RESOURCE = "resources/phyloxml.xsd";
- public final static String PHYLO_XML_SUFFIX = ".xml";
- public final static String ID_NORMALIZED_FASTA_FILE_SUFFIX = "_ni.fasta";
- public final static String ID_MAP_FILE_SUFFIX = ".nim";
- public final static String UTF_8 = "UTF-8";
- public final static String ISO_8859_1 = "ISO-8859-1";
- public final static String PHYLO_XML_REFERENCE = "Han MV and Zmasek CM (2009): \"phyloXML: XML for evolutionary biology and comparative genomics\", BMC Bioinformatics 10:356";
- public final static boolean RELEASE = false;
+ public final static String FORESTER_VERSION = "1.046";
+ public final static String FORESTER_DATE = "170830";
+ public final static String PHYLO_XML_VERSION = "1.20";
+ public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org";
+ public final static String PHYLO_XML_XSD = "phyloxml.xsd";
+ public final static String XML_SCHEMA_INSTANCE = "http://www.w3.org/2001/XMLSchema-instance";
+ public final static String LOCAL_PHYLOXML_XSD_RESOURCE = "resources/phyloxml.xsd";
+ public final static String PHYLO_XML_SUFFIX = ".xml";
+ public final static String ID_NORMALIZED_FASTA_FILE_SUFFIX = "_ni.fasta";
+ public final static String ID_NORMALIZED_NEXUS_FILE_SUFFIX = "_ni.nexus";
+ public final static String ID_NORMALIZED_PHYLIP_FILE_SUFFIX = "_ni.phylip";
+ public final static String FASTA_FILE_SUFFIX = ".fasta";
+ public final static String NEXUS_FILE_SUFFIX = ".nexus";
+ public final static String PHYLIP_FILE_SUFFIX = ".phylip";
+ public final static String ID_MAP_FILE_SUFFIX = ".nim";
+ public final static String UTF_8 = "UTF-8";
+ public final static String ISO_8859_1 = "ISO-8859-1";
+ public final static String PHYLO_XML_REFERENCE = "Han MV and Zmasek CM (2009): \"phyloXML: XML for evolutionary biology and comparative genomics\", BMC Bioinformatics 10:356";
+ public final static boolean RELEASE = false;
public enum PhylogeneticTreeFormats {
NH,
}
return null;
}
+
+ final public static String isWritableFile( final String s ) {
+ return isWritableFile( new File( s ) );
+ }
/**
* Helper for method "stringToColor".