2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
26 package org.forester.application;
29 import java.io.FileInputStream;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.HashMap;
33 import java.util.List;
35 import java.util.regex.Matcher;
36 import java.util.regex.Pattern;
38 import org.forester.io.parsers.FastaParser;
39 import org.forester.io.writers.SequenceWriter;
40 import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
41 import org.forester.sequence.MolecularSequence;
42 import org.forester.util.CommandLineArguments;
43 import org.forester.util.ForesterUtil;
45 public final class fasta_split {
47 final static private String PRG_NAME = "fasta_split";
48 final static private String PRG_VERSION = "1.00";
49 final static private String PRG_DATE = "150320";
51 public static void main( final String args[] ) {
52 ForesterUtil.printProgramInformation( fasta_split.PRG_NAME, fasta_split.PRG_VERSION, fasta_split.PRG_DATE );
54 if ( ( args.length != 3 ) ) {
55 fasta_split.argumentsError();
57 CommandLineArguments cla = null;
59 cla = new CommandLineArguments( args );
61 catch ( final Exception e ) {
62 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
64 final String pattern_str = cla.getName( 0 );
65 final File infile = cla.getFile( 1 );
66 final File outdir = cla.getFile( 2 );
69 pa = Pattern.compile( pattern_str );
71 catch ( final Exception ex ) {
72 ForesterUtil.fatalError( PRG_NAME, ex.getMessage() );
74 final String error = ForesterUtil.isReadableFile( infile );
75 if ( !ForesterUtil.isEmpty( error ) ) {
76 ForesterUtil.fatalError( PRG_NAME, error );
78 if ( !outdir.isDirectory() ) {
79 ForesterUtil.fatalError( PRG_NAME, outdir + " is not a directory" );
81 List<MolecularSequence> seqs = null;
83 seqs = FastaParser.parse( new FileInputStream( infile ) );
85 catch ( final IOException e ) {
86 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
88 if ( ( seqs == null ) || seqs.isEmpty() ) {
89 ForesterUtil.fatalError( PRG_NAME, infile + " appears empty" );
91 final Map<String, List<MolecularSequence>> output = new HashMap<String, List<MolecularSequence>>();
92 for( final MolecularSequence seq : seqs ) {
93 final Matcher m = pa.matcher( seq.getIdentifier() );
95 final String key = m.group( 1 );
96 if ( !output.containsKey( key ) ) {
97 output.put( key, new ArrayList<MolecularSequence>() );
99 output.get( key ).add( seq );
102 ForesterUtil.fatalError( PRG_NAME, pattern_str + " not found in sequence " + seq.getIdentifier() );
106 for( final Map.Entry<String, List<MolecularSequence>> entry : output.entrySet() ) {
107 final File of = new File( outdir.getAbsolutePath().toString() + "/" + entry.getKey() + ".fasta" );
109 ForesterUtil.fatalError( PRG_NAME, of + " already exists" );
111 System.out.println( ++c + ": writing " + of );
113 SequenceWriter.writeSeqs( entry.getValue(), of, SEQ_FORMAT.FASTA, 60 );
115 catch ( final IOException e ) {
116 ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
121 private static void argumentsError() {
122 System.out.println( PRG_NAME + " <pattern> <infile> <outdir>" );
123 System.out.println();