X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Ffasta_split.java;h=f71d0a15d826eb4d2d7fd459b0ee4260dfa1397b;hb=c0439ed8b088887ffea2faf11bc7897333287cb3;hp=5b101109781eb70b688982e36035ba1eee19cb8e;hpb=ace1d935e40a15f7d09d14439468412b03696acb;p=jalview.git diff --git a/forester/java/src/org/forester/application/fasta_split.java b/forester/java/src/org/forester/application/fasta_split.java index 5b10110..f71d0a1 100644 --- a/forester/java/src/org/forester/application/fasta_split.java +++ b/forester/java/src/org/forester/application/fasta_split.java @@ -22,6 +22,11 @@ // // Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester +// +// +// "java -Xmx1024m -cp path\to\forester.jar org.forester.application.fasta_split +// +// package org.forester.application; @@ -46,7 +51,7 @@ public final class fasta_split { final static private String PRG_NAME = "fasta_split"; final static private String PRG_VERSION = "1.00"; - final static private String PRG_DATE = "150320"; + final static private String PRG_DATE = "170516"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( fasta_split.PRG_NAME, fasta_split.PRG_VERSION, fasta_split.PRG_DATE ); @@ -75,6 +80,9 @@ public final class fasta_split { if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( PRG_NAME, error ); } + if ( !outdir.exists() ) { + new File( outdir.toString() ).mkdir(); + } if ( !outdir.isDirectory() ) { ForesterUtil.fatalError( PRG_NAME, outdir + " is not a directory" ); } @@ -88,7 +96,9 @@ public final class fasta_split { if ( ( seqs == null ) || seqs.isEmpty() ) { ForesterUtil.fatalError( PRG_NAME, infile + " appears empty" ); } + System.out.println( "Read " + seqs.size() + " sequences" ); final Map> output = new HashMap>(); + for( final MolecularSequence seq : seqs ) { final Matcher m = pa.matcher( seq.getIdentifier() ); if ( m.find() ) { @@ -99,16 +109,24 @@ public final class fasta_split { output.get( key ).add( seq ); } else { - ForesterUtil.fatalError( PRG_NAME, pattern_str + " not found in sequence " + seq.getIdentifier() ); + System.out.println( "warning: " + pattern_str + " not found in sequence \"" + seq.getIdentifier() + + "\"" ); + final String key = "unknown"; + if ( !output.containsKey( key ) ) { + output.put( key, new ArrayList() ); + } + output.get( key ).add( seq ); } } int c = 0; for( final Map.Entry> entry : output.entrySet() ) { - final File of = new File( outdir.getAbsolutePath().toString() + "/" + entry.getKey() + ".fasta" ); + String s = entry.getKey(); + s = s.replace( '*', '_' ); + final File of = new File( outdir.getAbsolutePath().toString() + "/" + s + ".fasta" ); if ( of.exists() ) { ForesterUtil.fatalError( PRG_NAME, of + " already exists" ); } - System.out.println( ++c + ": writing " + of ); + System.out.println( ++c + ": writing " + of + " [" + entry.getValue().size() + " seqs]" ); try { SequenceWriter.writeSeqs( entry.getValue(), of, SEQ_FORMAT.FASTA, 60 ); } @@ -121,6 +139,10 @@ public final class fasta_split { private static void argumentsError() { System.out.println( PRG_NAME + " " ); System.out.println(); + System.out.println( "Examples: " ); + System.out.println( " " + PRG_NAME + " \"v-germ=(\\S+)\" tt.fasta outdir" ); + System.out.println( " " + PRG_NAME + " \"(\\S+?)\\|\" seqs.fasta outdir" ); + System.out.println(); System.exit( -1 ); } }