in progress...
authorcmzmasek <chris.zma@outlook.com>
Wed, 30 Aug 2017 17:49:33 +0000 (10:49 -0700)
committercmzmasek <chris.zma@outlook.com>
Wed, 30 Aug 2017 17:49:33 +0000 (10:49 -0700)
forester/java/src/org/forester/application/tap.java [new file with mode: 0644]

diff --git a/forester/java/src/org/forester/application/tap.java b/forester/java/src/org/forester/application/tap.java
new file mode 100644 (file)
index 0000000..2fcf2cc
--- /dev/null
@@ -0,0 +1,211 @@
+
+package org.forester.application;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.parsers.FastaParser;
+import org.forester.io.parsers.GeneralMsaParser;
+import org.forester.io.writers.SequenceWriter;
+import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
+import org.forester.msa.BasicMsa;
+import org.forester.msa.Msa;
+import org.forester.msa.Msa.MSA_FORMAT;
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.MolecularSequence;
+import org.forester.util.CommandLineArguments;
+import org.forester.util.ForesterConstants;
+import org.forester.util.ForesterUtil;
+
+public class tap {
+
+    final static private String PRG_NAME                = "tap";
+    final static private String PRG_DATE                = "170327";
+    final static private String PRG_DESC                = "Replacement of labels in multiple sequence files";
+    final static private String PRG_VERSION             = "1.00";
+    final static private String WWW                     = "https://sites.google.com/site/cmzmasek/home/software/forester";
+    final static private String E_MAIL                  = "phyloxml@gmail.com";
+    final static private String EXTRACT_TAXONOMY_OPTION = "t";
+    final static private String ANNOTATION_OPTION       = "a";
+    final static private String HELP_OPTION_1           = "help";
+    final static private String HELP_OPTION_2           = "h";
+
+    public static void main( final String args[] ) {
+        try {
+            ForesterUtil.printProgramInformation( PRG_NAME,
+                                                  PRG_DESC,
+                                                  PRG_VERSION,
+                                                  PRG_DATE,
+                                                  E_MAIL,
+                                                  WWW,
+                                                  ForesterUtil.getForesterLibraryInformation() );
+            CommandLineArguments cla = null;
+            try {
+                cla = new CommandLineArguments( args );
+            }
+            catch ( final Exception e ) {
+                ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+            }
+            if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) ) {
+                System.out.println();
+                print_help();
+                System.exit( 0 );
+            }
+            String input = null;
+            String output = null;
+            String list_file = null;
+            String i = null;
+            if ( args.length == 3 ) {
+                input = cla.getName( 0 );
+                output = cla.getName( 1 );
+                list_file = cla.getName( 2 );
+            }
+            else if ( args.length == 1 ) {
+                input = cla.getName( 0 );
+                i = null;
+                if ( input.toLowerCase().endsWith( ".fasta" ) ) {
+                    i = input.substring( 0, input.length() - 7 );
+                }
+                else if ( input.toLowerCase().endsWith( ".fsa" ) ) {
+                    i = input.substring( 0, input.length() - 5 );
+                }
+                else {
+                    i = input;
+                }
+                output = i + ForesterConstants.ID_NORMALIZED_FASTA_FILE_SUFFIX;
+                list_file = i + ForesterConstants.ID_MAP_FILE_SUFFIX;
+            }
+            else {
+                print_help();
+                System.exit( -1 );
+            }
+            final List<String> allowed_options = new ArrayList<>();
+            allowed_options.add( ANNOTATION_OPTION );
+            final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
+            if ( dissallowed_options.length() > 0 ) {
+                ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
+            }
+            final File outfile_file = new File( output );
+            final File listfile = new File( list_file );
+            final File input_file = new File( input );
+            final String error1 = ForesterUtil.isWritableFile( outfile_file );
+            if ( !ForesterUtil.isEmpty( error1 ) ) {
+                ForesterUtil.fatalError( PRG_NAME, error1 );
+            }
+            final String error2 = ForesterUtil.isWritableFile( listfile );
+            if ( !ForesterUtil.isEmpty( error2 ) ) {
+                ForesterUtil.fatalError( PRG_NAME, error2 );
+            }
+            final String error3 = ForesterUtil.isReadableFile( input_file );
+            if ( !ForesterUtil.isEmpty( error3 ) ) {
+                ForesterUtil.fatalError( PRG_NAME, error3 );
+            }
+            final boolean fasta_like = ForesterUtil.isLooksLikeFasta( input_file );
+            final Msa.MSA_FORMAT output_format = MSA_FORMAT.FASTA;
+            System.out.println();
+            System.out.println( "Input alignment       : " + input );
+            System.out.println( "Output alignment      : " + output );
+            System.out.println( "Name list             : " + list_file );
+            if ( fasta_like ) {
+                System.out.println( "Input format          : Fasta" );
+            }
+            else {
+                System.out.println( "Input format          : Phylip like" );
+            }
+            if ( output_format == MSA_FORMAT.FASTA ) {
+                System.out.println( "Output format         : Fasta" );
+            }
+            else if ( output_format == MSA_FORMAT.NEXUS ) {
+                System.out.println( "Output format         : Nexus" );
+            }
+            else if ( output_format == MSA_FORMAT.PHYLIP ) {
+                System.out.println( "Output format         : Phylip" );
+            }
+            System.out.println();
+            
+            final List<MolecularSequence> seqs;
+            final FileInputStream is = new FileInputStream( input_file );
+            if ( FastaParser.isLikelyFasta( input_file ) ) {
+                seqs = FastaParser.parse( is );
+            }
+            else {
+                seqs = GeneralMsaParser.parseSeqs( is );
+            }
+            if ( seqs == null ) {
+                ForesterUtil.fatalError( PRG_NAME, "failed to read MSA" );
+            }
+            if ( seqs.size() < 1 ) {
+                ForesterUtil.fatalError( PRG_NAME, "MSA seems to be devoid of sequences" );
+            }
+           // TODO print number of seqs
+           // TODO print number min length
+           // TODO print max length
+           // TODO OR
+          //  TODO print length is aligned
+          //  TODO if no aligned no phylip or nexus outpt
+            //
+           
+            final List<MolecularSequence> seqs2 = new ArrayList<>();
+            int counter = 0;
+            final BufferedWriter writer = ForesterUtil.createBufferedWriter( list_file );
+            for( final MolecularSequence seq : seqs ) {
+                final String new_name = modify_name( seq.getIdentifier(), counter++, writer );
+                final MolecularSequence ns = BasicSequence.createSequence( new_name,
+                                                                           seq.getMolecularSequenceAsString() );
+                seqs2.add( ns );
+            }
+            writer.flush();
+            writer.close();
+            final BufferedWriter seq_writer = ForesterUtil.createBufferedWriter( outfile_file );
+            if ( ( output_format == MSA_FORMAT.NEXUS ) || ( output_format == MSA_FORMAT.PHYLIP ) ) {
+                final Msa m = BasicMsa.createInstance( seqs2 );
+                m.write( seq_writer, output_format );
+            }
+            else if ( output_format == MSA_FORMAT.FASTA ) {
+                SequenceWriter.writeSeqs( seqs2, seq_writer, SEQ_FORMAT.FASTA, 60 );
+            }
+            seq_writer.flush();
+            seq_writer.close();
+            //                    Util.print_message( PRG_NAME, "wrote: " + list_file )
+            //                    Util.print_message( PRG_NAME, "wrote: " + output )
+        }
+        catch ( final IllegalArgumentException e ) {
+            ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            ForesterUtil.fatalError( PRG_NAME, "Unexpected errror!" );
+        }
+    }
+
+    final static String modify_name( final String desc, final int counter, final Writer writer ) throws IOException {
+        desc.replaceAll( "\\s+", " " );
+        final String new_desc = Integer.toHexString( counter );
+        if ( new_desc.length() > 9 ) {
+            ForesterUtil.fatalError( PRG_NAME,
+                                     "shortened identifier [" + new_desc + "] is too long (" + new_desc.length()
+                                             + " characters)" );
+        }
+        writer.write( new_desc + "\t" + desc + "\n" );
+        return new_desc;
+    }
+
+    private final static void print_help() {
+        System.out.println( "Usage:" );
+        System.out.println();
+        System.out.println( PRG_NAME + " [options] <gene tree file> <query>" );
+        System.out.println();
+        System.out.println( " options:" );
+        //System.out.println( "  -" + SEP_OPTION + "=<separator>: the separator to be used" );
+        System.out.println();
+        System.out.println( "Example:" );
+        System.out.println();
+        System.out.println( " " + PRG_NAME + " -s=. my_tree.xml A.1.1.1" );
+        System.out.println();
+    }
+}