git://source.jalview.org
/
jalview.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
added -rs option
[jalview.git]
/
forester
/
java
/
src
/
org
/
forester
/
application
/
fasta_split.java
diff --git
a/forester/java/src/org/forester/application/fasta_split.java
b/forester/java/src/org/forester/application/fasta_split.java
index
544701a
..
5dc963c
100644
(file)
--- a/
forester/java/src/org/forester/application/fasta_split.java
+++ b/
forester/java/src/org/forester/application/fasta_split.java
@@
-20,12
+20,9
@@
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
-// Contact: phylosoft @ gmail . com
-// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
-//
//
// "java -Xmx1024m -cp path\to\forester.jar org.forester.application.fasta_split
//
// "java -Xmx1024m -cp path\to\forester.jar org.forester.application.fasta_split
-//
+//
//
package org.forester.application;
//
package org.forester.application;
@@
-50,8
+47,8
@@
import org.forester.util.ForesterUtil;
public final class fasta_split {
final static private String PRG_NAME = "fasta_split";
public final class fasta_split {
final static private String PRG_NAME = "fasta_split";
- final static private String PRG_VERSION = "1.00";
- final static private String PRG_DATE = "150325";
+ final static private String PRG_VERSION = "1.01";
+ final static private String PRG_DATE = "170718";
public static void main( final String args[] ) {
ForesterUtil.printProgramInformation( fasta_split.PRG_NAME, fasta_split.PRG_VERSION, fasta_split.PRG_DATE );
public static void main( final String args[] ) {
ForesterUtil.printProgramInformation( fasta_split.PRG_NAME, fasta_split.PRG_VERSION, fasta_split.PRG_DATE );
@@
-80,6
+77,9
@@
public final class fasta_split {
if ( !ForesterUtil.isEmpty( error ) ) {
ForesterUtil.fatalError( PRG_NAME, error );
}
if ( !ForesterUtil.isEmpty( error ) ) {
ForesterUtil.fatalError( PRG_NAME, error );
}
+ if ( !outdir.exists() ) {
+ new File( outdir.toString() ).mkdir();
+ }
if ( !outdir.isDirectory() ) {
ForesterUtil.fatalError( PRG_NAME, outdir + " is not a directory" );
}
if ( !outdir.isDirectory() ) {
ForesterUtil.fatalError( PRG_NAME, outdir + " is not a directory" );
}
@@
-93,10
+93,9
@@
public final class fasta_split {
if ( ( seqs == null ) || seqs.isEmpty() ) {
ForesterUtil.fatalError( PRG_NAME, infile + " appears empty" );
}
if ( ( seqs == null ) || seqs.isEmpty() ) {
ForesterUtil.fatalError( PRG_NAME, infile + " appears empty" );
}
+ System.out.println( "Read " + seqs.size() + " sequences" );
final Map<String, List<MolecularSequence>> output = new HashMap<String, List<MolecularSequence>>();
final Map<String, List<MolecularSequence>> output = new HashMap<String, List<MolecularSequence>>();
- int cc = 0;
for( final MolecularSequence seq : seqs ) {
for( final MolecularSequence seq : seqs ) {
- System.out.println( ++cc );
final Matcher m = pa.matcher( seq.getIdentifier() );
if ( m.find() ) {
final String key = m.group( 1 );
final Matcher m = pa.matcher( seq.getIdentifier() );
if ( m.find() ) {
final String key = m.group( 1 );
@@
-106,8
+105,8
@@
public final class fasta_split {
output.get( key ).add( seq );
}
else {
output.get( key ).add( seq );
}
else {
- //ForesterUtil.fatalError( PRG_NAME, pattern_str + " not found in sequence \"" + seq.getIdentifier() + "\"" );
- System.out.println( "warning: " + pattern_str + " not found in sequence \"" + seq.getIdentifier() + "\"" );
+ System.out.println( "warning: " + pattern_str + " not found in sequence \"" + seq.getIdentifier()
+ + "\"" );
final String key = "unknown";
if ( !output.containsKey( key ) ) {
output.put( key, new ArrayList<MolecularSequence>() );
final String key = "unknown";
if ( !output.containsKey( key ) ) {
output.put( key, new ArrayList<MolecularSequence>() );
@@
-116,24
+115,37
@@
public final class fasta_split {
}
}
int c = 0;
}
}
int c = 0;
+ int seqs_written = 0;
for( final Map.Entry<String, List<MolecularSequence>> entry : output.entrySet() ) {
for( final Map.Entry<String, List<MolecularSequence>> entry : output.entrySet() ) {
- final File of = new File( outdir.getAbsolutePath().toString() + "/" + entry.getKey() + ".fasta" );
+ String s = entry.getKey().trim();
+ s = s.replaceAll( "[\\./\\*\\s]+", "_" );
+ s = s.replaceAll( "\\(", "~" );
+ s = s.replaceAll( "\\)", "~" );
+ final File of = new File( outdir.getAbsolutePath().toString() + "/" + s + ".fasta" );
if ( of.exists() ) {
ForesterUtil.fatalError( PRG_NAME, of + " already exists" );
}
if ( of.exists() ) {
ForesterUtil.fatalError( PRG_NAME, of + " already exists" );
}
- System.out.println( ++c + ": writing " + of );
+ System.out.println( ++c + ": writing " + of + " [" + entry.getValue().size() + " seqs]" );
+
try {
SequenceWriter.writeSeqs( entry.getValue(), of, SEQ_FORMAT.FASTA, 60 );
}
catch ( final IOException e ) {
ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
}
try {
SequenceWriter.writeSeqs( entry.getValue(), of, SEQ_FORMAT.FASTA, 60 );
}
catch ( final IOException e ) {
ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
}
+ seqs_written += entry.getValue().size();
}
}
+ System.out.println( "Wrote " + seqs_written + " sequences" );
}
private static void argumentsError() {
System.out.println( PRG_NAME + " <pattern> <infile> <outdir>" );
System.out.println();
}
private static void argumentsError() {
System.out.println( PRG_NAME + " <pattern> <infile> <outdir>" );
System.out.println();
+ System.out.println( "Examples: " );
+ System.out.println( " " + PRG_NAME + " \"v-germ=(\\S+)\" tt.fasta outdir" );
+ System.out.println( " " + PRG_NAME + " \"(\\S+?)\\|\" seqs.fasta outdir" );
+ System.out.println( " " + PRG_NAME + " \"OS=(.+?)[A-Z]{2}=\" seqs.fasta outdir" );
+ System.out.println();
System.exit( -1 );
}
}
System.exit( -1 );
}
}