2 package org.forester.application;
4 import java.io.BufferedWriter;
6 import java.io.FileInputStream;
7 import java.io.IOException;
9 import java.util.ArrayList;
10 import java.util.List;
12 import org.forester.io.parsers.FastaParser;
13 import org.forester.io.parsers.GeneralMsaParser;
14 import org.forester.io.writers.SequenceWriter;
15 import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
16 import org.forester.msa.BasicMsa;
17 import org.forester.msa.Msa;
18 import org.forester.msa.Msa.MSA_FORMAT;
19 import org.forester.sequence.BasicSequence;
20 import org.forester.sequence.MolecularSequence;
21 import org.forester.util.CommandLineArguments;
22 import org.forester.util.ForesterConstants;
23 import org.forester.util.ForesterUtil;
27 final static private String PRG_NAME = "tap";
28 final static private String PRG_DATE = "170327";
29 final static private String PRG_DESC = "Replacement of labels in multiple sequence files";
30 final static private String PRG_VERSION = "1.00";
31 final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
32 final static private String E_MAIL = "phyloxml@gmail.com";
33 final static private String EXTRACT_TAXONOMY_OPTION = "t";
34 final static private String ANNOTATION_OPTION = "a";
35 final static private String HELP_OPTION_1 = "help";
36 final static private String HELP_OPTION_2 = "h";
38 public static void main( final String args[] ) {
40 ForesterUtil.printProgramInformation( PRG_NAME,
46 ForesterUtil.getForesterLibraryInformation() );
47 CommandLineArguments cla = null;
49 cla = new CommandLineArguments( args );
51 catch ( final Exception e ) {
52 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
54 if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) ) {
61 String list_file = null;
63 if ( args.length == 3 ) {
64 input = cla.getName( 0 );
65 output = cla.getName( 1 );
66 list_file = cla.getName( 2 );
68 else if ( args.length == 1 ) {
69 input = cla.getName( 0 );
71 if ( input.toLowerCase().endsWith( ".fasta" ) ) {
72 i = input.substring( 0, input.length() - 7 );
74 else if ( input.toLowerCase().endsWith( ".fsa" ) ) {
75 i = input.substring( 0, input.length() - 5 );
80 output = i + ForesterConstants.ID_NORMALIZED_FASTA_FILE_SUFFIX;
81 list_file = i + ForesterConstants.ID_MAP_FILE_SUFFIX;
87 final List<String> allowed_options = new ArrayList<>();
88 allowed_options.add( ANNOTATION_OPTION );
89 final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
90 if ( dissallowed_options.length() > 0 ) {
91 ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
93 final File outfile_file = new File( output );
94 final File listfile = new File( list_file );
95 final File input_file = new File( input );
96 final String error1 = ForesterUtil.isWritableFile( outfile_file );
97 if ( !ForesterUtil.isEmpty( error1 ) ) {
98 ForesterUtil.fatalError( PRG_NAME, error1 );
100 final String error2 = ForesterUtil.isWritableFile( listfile );
101 if ( !ForesterUtil.isEmpty( error2 ) ) {
102 ForesterUtil.fatalError( PRG_NAME, error2 );
104 final String error3 = ForesterUtil.isReadableFile( input_file );
105 if ( !ForesterUtil.isEmpty( error3 ) ) {
106 ForesterUtil.fatalError( PRG_NAME, error3 );
108 final boolean fasta_like = ForesterUtil.isLooksLikeFasta( input_file );
109 final Msa.MSA_FORMAT output_format = MSA_FORMAT.FASTA;
110 System.out.println();
111 System.out.println( "Input alignment : " + input );
112 System.out.println( "Output alignment : " + output );
113 System.out.println( "Name list : " + list_file );
115 System.out.println( "Input format : Fasta" );
118 System.out.println( "Input format : Phylip like" );
120 if ( output_format == MSA_FORMAT.FASTA ) {
121 System.out.println( "Output format : Fasta" );
123 else if ( output_format == MSA_FORMAT.NEXUS ) {
124 System.out.println( "Output format : Nexus" );
126 else if ( output_format == MSA_FORMAT.PHYLIP ) {
127 System.out.println( "Output format : Phylip" );
129 System.out.println();
131 final List<MolecularSequence> seqs;
132 final FileInputStream is = new FileInputStream( input_file );
133 if ( FastaParser.isLikelyFasta( input_file ) ) {
134 seqs = FastaParser.parse( is );
137 seqs = GeneralMsaParser.parseSeqs( is );
139 if ( seqs == null ) {
140 ForesterUtil.fatalError( PRG_NAME, "failed to read MSA" );
142 if ( seqs.size() < 1 ) {
143 ForesterUtil.fatalError( PRG_NAME, "MSA seems to be devoid of sequences" );
145 // TODO print number of seqs
146 // TODO print number min length
147 // TODO print max length
149 // TODO print length is aligned
150 // TODO if no aligned no phylip or nexus outpt
153 final List<MolecularSequence> seqs2 = new ArrayList<>();
155 final BufferedWriter writer = ForesterUtil.createBufferedWriter( list_file );
156 for( final MolecularSequence seq : seqs ) {
157 final String new_name = modify_name( seq.getIdentifier(), counter++, writer );
158 final MolecularSequence ns = BasicSequence.createSequence( new_name,
159 seq.getMolecularSequenceAsString() );
164 final BufferedWriter seq_writer = ForesterUtil.createBufferedWriter( outfile_file );
165 if ( ( output_format == MSA_FORMAT.NEXUS ) || ( output_format == MSA_FORMAT.PHYLIP ) ) {
166 final Msa m = BasicMsa.createInstance( seqs2 );
167 m.write( seq_writer, output_format );
169 else if ( output_format == MSA_FORMAT.FASTA ) {
170 SequenceWriter.writeSeqs( seqs2, seq_writer, SEQ_FORMAT.FASTA, 60 );
174 // Util.print_message( PRG_NAME, "wrote: " + list_file )
175 // Util.print_message( PRG_NAME, "wrote: " + output )
177 catch ( final IllegalArgumentException e ) {
178 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
180 catch ( final Exception e ) {
182 ForesterUtil.fatalError( PRG_NAME, "Unexpected errror!" );
186 final static String modify_name( final String desc, final int counter, final Writer writer ) throws IOException {
187 desc.replaceAll( "\\s+", " " );
188 final String new_desc = Integer.toHexString( counter );
189 if ( new_desc.length() > 9 ) {
190 ForesterUtil.fatalError( PRG_NAME,
191 "shortened identifier [" + new_desc + "] is too long (" + new_desc.length()
194 writer.write( new_desc + "\t" + desc + "\n" );
198 private final static void print_help() {
199 System.out.println( "Usage:" );
200 System.out.println();
201 System.out.println( PRG_NAME + " [options] <gene tree file> <query>" );
202 System.out.println();
203 System.out.println( " options:" );
204 //System.out.println( " -" + SEP_OPTION + "=<separator>: the separator to be used" );
205 System.out.println();
206 System.out.println( "Example:" );
207 System.out.println();
208 System.out.println( " " + PRG_NAME + " -s=. my_tree.xml A.1.1.1" );
209 System.out.println();