2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.application;
29 import java.io.FileInputStream;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.Collections;
33 import java.util.HashMap;
34 import java.util.List;
37 import org.forester.io.parsers.FastaParser;
38 import org.forester.io.writers.SequenceWriter;
39 import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
40 import org.forester.sequence.BasicSequence;
41 import org.forester.sequence.MolecularSequence;
42 import org.forester.util.CommandLineArguments;
43 import org.forester.util.ForesterUtil;
45 public final class check_fasta {
47 final static private String PRG_NAME = "check_fasta";
48 final static private String PRG_VERSION = "1.00";
49 final static private String PRG_DATE = "131202";
51 public static void main( final String args[] ) {
52 ForesterUtil.printProgramInformation( check_fasta.PRG_NAME, check_fasta.PRG_VERSION, check_fasta.PRG_DATE );
54 if ( ( args.length != 2 ) ) {
55 check_fasta.argumentsError();
57 CommandLineArguments cla = null;
59 cla = new CommandLineArguments( args );
61 catch ( final Exception e ) {
62 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
64 final File indir = cla.getFile( 0 );
65 final File outdir = cla.getFile( 1 );
66 if ( !indir.isDirectory() ) {
67 ForesterUtil.fatalError( PRG_NAME, indir + " is not a directory" );
69 if ( !outdir.isDirectory() ) {
70 ForesterUtil.fatalError( PRG_NAME, outdir + " is not a directory" );
72 final File[] list_of_files = indir.listFiles();
73 final List<File> infiles = new ArrayList<File>();
74 for( final File file : list_of_files ) {
77 && ( file.toString().toLowerCase().endsWith( ".fasta" ) || file.toString().toLowerCase()
78 .endsWith( ".fas" ) ) ) {
82 Collections.sort( infiles );
84 for( final File infile : infiles ) {
85 System.out.println( ++c + "/" + infiles.size() + ": " + infile );
86 execute( outdir, infile );
90 private static void execute( final File outdir, final File infile ) {
91 final File outfile = new File( outdir.getAbsolutePath().toString() + "/" + infile.getName() );
92 if ( outfile.exists() ) {
93 System.out.println( outfile + " already exists" );
97 final List<MolecularSequence> seqs = FastaParser.parse( new FileInputStream( infile ) );
98 final Map<String, Short> names = new HashMap<String, Short>();
100 for( final MolecularSequence seq : seqs ) {
101 if ( procSeq( infile.toString(), names, seq ) ) {
105 if ( duplicates > 0 ) {
106 SequenceWriter.writeSeqs( seqs, outfile, SEQ_FORMAT.FASTA, 60 );
109 catch ( final IOException e ) {
110 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
115 private static boolean procSeq( final String infile, final Map<String, Short> names, final MolecularSequence seq ) {
116 boolean duplicate = false;
117 final String name = seq.getIdentifier();
118 if ( !names.containsKey( name ) ) {
119 names.put( name, ( short ) 1 );
123 final short i = names.get( name );
124 ( ( BasicSequence ) seq ).setIdentifier( name + "_" + i );
125 names.put( name, ( short ) ( i + 1 ) );
126 System.out.println( " " + infile + " " + i + ": " + seq.getIdentifier() );
131 private static void argumentsError() {
132 System.out.println( PRG_NAME + " <indir> <outdir>" );
133 System.out.println();