// $Id: // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // // Copyright (C) 2008-2009 Christian M. Zmasek // Copyright (C) 2008-2009 Burnham Institute for Medical Research // All rights reserved // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.forester.io.parsers.FastaParser; import org.forester.io.writers.SequenceWriter; import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; import org.forester.sequence.BasicSequence; import org.forester.sequence.MolecularSequence; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; public final class check_fasta { final static private String PRG_NAME = "check_fasta"; final static private String PRG_VERSION = "1.00"; final static private String PRG_DATE = "131202"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( check_fasta.PRG_NAME, check_fasta.PRG_VERSION, check_fasta.PRG_DATE ); System.out.println(); if ( ( args.length != 2 ) ) { check_fasta.argumentsError(); } CommandLineArguments cla = null; try { cla = new CommandLineArguments( args ); } catch ( final Exception e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } final File indir = cla.getFile( 0 ); final File outdir = cla.getFile( 1 ); if ( !indir.isDirectory() ) { ForesterUtil.fatalError( PRG_NAME, indir + " is not a directory" ); } if ( !outdir.isDirectory() ) { ForesterUtil.fatalError( PRG_NAME, outdir + " is not a directory" ); } final File[] list_of_files = indir.listFiles(); final List infiles = new ArrayList(); for( final File file : list_of_files ) { if ( file.isFile() && file.canRead() && ( file.toString().toLowerCase().endsWith( ".fasta" ) || file.toString().toLowerCase() .endsWith( ".fas" ) ) ) { infiles.add( file ); } } Collections.sort( infiles ); int c = 0; for( final File infile : infiles ) { System.out.println( ++c + "/" + infiles.size() + ": " + infile ); execute( outdir, infile ); } } private static void execute( final File outdir, final File infile ) { final File outfile = new File( outdir.getAbsolutePath().toString() + "/" + infile.getName() ); if ( outfile.exists() ) { System.out.println( outfile + " already exists" ); } else { try { final List seqs = FastaParser.parse( new FileInputStream( infile ) ); final Map names = new HashMap(); int duplicates = 0; for( final MolecularSequence seq : seqs ) { if ( procSeq( infile.toString(), names, seq ) ) { ++duplicates; } } if ( duplicates > 0 ) { SequenceWriter.writeSeqs( seqs, outfile, SEQ_FORMAT.FASTA, 60 ); } } catch ( final IOException e ) { ForesterUtil.fatalError( PRG_NAME, e.getMessage() ); } } } private static boolean procSeq( final String infile, final Map names, final MolecularSequence seq ) { boolean duplicate = false; final String name = seq.getIdentifier(); if ( !names.containsKey( name ) ) { names.put( name, ( short ) 1 ); } else { duplicate = true; final short i = names.get( name ); ( ( BasicSequence ) seq ).setIdentifier( name + "_" + i ); names.put( name, ( short ) ( i + 1 ) ); System.out.println( " " + infile + " " + i + ": " + seq.getIdentifier() ); } return duplicate; } private static void argumentsError() { System.out.println( PRG_NAME + " " ); System.out.println(); System.exit( -1 ); } }