in progress...
[jalview.git] / forester / java / src / org / forester / application / tap.java
1
2 package org.forester.application;
3
4 import java.io.BufferedWriter;
5 import java.io.File;
6 import java.io.FileInputStream;
7 import java.io.IOException;
8 import java.io.Writer;
9 import java.util.ArrayList;
10 import java.util.List;
11
12 import org.forester.io.parsers.FastaParser;
13 import org.forester.io.parsers.GeneralMsaParser;
14 import org.forester.io.writers.SequenceWriter;
15 import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
16 import org.forester.msa.BasicMsa;
17 import org.forester.msa.Msa;
18 import org.forester.msa.Msa.MSA_FORMAT;
19 import org.forester.sequence.BasicSequence;
20 import org.forester.sequence.MolecularSequence;
21 import org.forester.util.CommandLineArguments;
22 import org.forester.util.ForesterConstants;
23 import org.forester.util.ForesterUtil;
24
25 public class tap {
26
27     final static private String PRG_NAME                = "tap";
28     final static private String PRG_DATE                = "170327";
29     final static private String PRG_DESC                = "Replacement of labels in multiple sequence files";
30     final static private String PRG_VERSION             = "1.00";
31     final static private String WWW                     = "https://sites.google.com/site/cmzmasek/home/software/forester";
32     final static private String E_MAIL                  = "phyloxml@gmail.com";
33     final static private String EXTRACT_TAXONOMY_OPTION = "t";
34     final static private String ANNOTATION_OPTION       = "a";
35     final static private String HELP_OPTION_1           = "help";
36     final static private String HELP_OPTION_2           = "h";
37
38     public static void main( final String args[] ) {
39         try {
40             ForesterUtil.printProgramInformation( PRG_NAME,
41                                                   PRG_DESC,
42                                                   PRG_VERSION,
43                                                   PRG_DATE,
44                                                   E_MAIL,
45                                                   WWW,
46                                                   ForesterUtil.getForesterLibraryInformation() );
47             CommandLineArguments cla = null;
48             try {
49                 cla = new CommandLineArguments( args );
50             }
51             catch ( final Exception e ) {
52                 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
53             }
54             if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) ) {
55                 System.out.println();
56                 print_help();
57                 System.exit( 0 );
58             }
59             String input = null;
60             String output = null;
61             String list_file = null;
62             String i = null;
63             if ( args.length == 3 ) {
64                 input = cla.getName( 0 );
65                 output = cla.getName( 1 );
66                 list_file = cla.getName( 2 );
67             }
68             else if ( args.length == 1 ) {
69                 input = cla.getName( 0 );
70                 i = null;
71                 if ( input.toLowerCase().endsWith( ".fasta" ) ) {
72                     i = input.substring( 0, input.length() - 7 );
73                 }
74                 else if ( input.toLowerCase().endsWith( ".fsa" ) ) {
75                     i = input.substring( 0, input.length() - 5 );
76                 }
77                 else {
78                     i = input;
79                 }
80                 output = i + ForesterConstants.ID_NORMALIZED_FASTA_FILE_SUFFIX;
81                 list_file = i + ForesterConstants.ID_MAP_FILE_SUFFIX;
82             }
83             else {
84                 print_help();
85                 System.exit( -1 );
86             }
87             final List<String> allowed_options = new ArrayList<>();
88             allowed_options.add( ANNOTATION_OPTION );
89             final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
90             if ( dissallowed_options.length() > 0 ) {
91                 ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
92             }
93             final File outfile_file = new File( output );
94             final File listfile = new File( list_file );
95             final File input_file = new File( input );
96             final String error1 = ForesterUtil.isWritableFile( outfile_file );
97             if ( !ForesterUtil.isEmpty( error1 ) ) {
98                 ForesterUtil.fatalError( PRG_NAME, error1 );
99             }
100             final String error2 = ForesterUtil.isWritableFile( listfile );
101             if ( !ForesterUtil.isEmpty( error2 ) ) {
102                 ForesterUtil.fatalError( PRG_NAME, error2 );
103             }
104             final String error3 = ForesterUtil.isReadableFile( input_file );
105             if ( !ForesterUtil.isEmpty( error3 ) ) {
106                 ForesterUtil.fatalError( PRG_NAME, error3 );
107             }
108             final boolean fasta_like = ForesterUtil.isLooksLikeFasta( input_file );
109             final Msa.MSA_FORMAT output_format = MSA_FORMAT.FASTA;
110             System.out.println();
111             System.out.println( "Input alignment       : " + input );
112             System.out.println( "Output alignment      : " + output );
113             System.out.println( "Name list             : " + list_file );
114             if ( fasta_like ) {
115                 System.out.println( "Input format          : Fasta" );
116             }
117             else {
118                 System.out.println( "Input format          : Phylip like" );
119             }
120             if ( output_format == MSA_FORMAT.FASTA ) {
121                 System.out.println( "Output format         : Fasta" );
122             }
123             else if ( output_format == MSA_FORMAT.NEXUS ) {
124                 System.out.println( "Output format         : Nexus" );
125             }
126             else if ( output_format == MSA_FORMAT.PHYLIP ) {
127                 System.out.println( "Output format         : Phylip" );
128             }
129             System.out.println();
130             
131             final List<MolecularSequence> seqs;
132             final FileInputStream is = new FileInputStream( input_file );
133             if ( FastaParser.isLikelyFasta( input_file ) ) {
134                 seqs = FastaParser.parse( is );
135             }
136             else {
137                 seqs = GeneralMsaParser.parseSeqs( is );
138             }
139             if ( seqs == null ) {
140                 ForesterUtil.fatalError( PRG_NAME, "failed to read MSA" );
141             }
142             if ( seqs.size() < 1 ) {
143                 ForesterUtil.fatalError( PRG_NAME, "MSA seems to be devoid of sequences" );
144             }
145            // TODO print number of seqs
146            // TODO print number min length
147            // TODO print max length
148            // TODO OR
149           //  TODO print length is aligned
150           //  TODO if no aligned no phylip or nexus outpt
151             //
152            
153             final List<MolecularSequence> seqs2 = new ArrayList<>();
154             int counter = 0;
155             final BufferedWriter writer = ForesterUtil.createBufferedWriter( list_file );
156             for( final MolecularSequence seq : seqs ) {
157                 final String new_name = modify_name( seq.getIdentifier(), counter++, writer );
158                 final MolecularSequence ns = BasicSequence.createSequence( new_name,
159                                                                            seq.getMolecularSequenceAsString() );
160                 seqs2.add( ns );
161             }
162             writer.flush();
163             writer.close();
164             final BufferedWriter seq_writer = ForesterUtil.createBufferedWriter( outfile_file );
165             if ( ( output_format == MSA_FORMAT.NEXUS ) || ( output_format == MSA_FORMAT.PHYLIP ) ) {
166                 final Msa m = BasicMsa.createInstance( seqs2 );
167                 m.write( seq_writer, output_format );
168             }
169             else if ( output_format == MSA_FORMAT.FASTA ) {
170                 SequenceWriter.writeSeqs( seqs2, seq_writer, SEQ_FORMAT.FASTA, 60 );
171             }
172             seq_writer.flush();
173             seq_writer.close();
174             //                    Util.print_message( PRG_NAME, "wrote: " + list_file )
175             //                    Util.print_message( PRG_NAME, "wrote: " + output )
176         }
177         catch ( final IllegalArgumentException e ) {
178             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
179         }
180         catch ( final Exception e ) {
181             e.printStackTrace();
182             ForesterUtil.fatalError( PRG_NAME, "Unexpected errror!" );
183         }
184     }
185
186     final static String modify_name( final String desc, final int counter, final Writer writer ) throws IOException {
187         desc.replaceAll( "\\s+", " " );
188         final String new_desc = Integer.toHexString( counter );
189         if ( new_desc.length() > 9 ) {
190             ForesterUtil.fatalError( PRG_NAME,
191                                      "shortened identifier [" + new_desc + "] is too long (" + new_desc.length()
192                                              + " characters)" );
193         }
194         writer.write( new_desc + "\t" + desc + "\n" );
195         return new_desc;
196     }
197
198     private final static void print_help() {
199         System.out.println( "Usage:" );
200         System.out.println();
201         System.out.println( PRG_NAME + " [options] <gene tree file> <query>" );
202         System.out.println();
203         System.out.println( " options:" );
204         //System.out.println( "  -" + SEP_OPTION + "=<separator>: the separator to be used" );
205         System.out.println();
206         System.out.println( "Example:" );
207         System.out.println();
208         System.out.println( " " + PRG_NAME + " -s=. my_tree.xml A.1.1.1" );
209         System.out.println();
210     }
211 }