in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 28 Mar 2015 02:09:40 +0000 (02:09 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 28 Mar 2015 02:09:40 +0000 (02:09 +0000)
forester/java/src/org/forester/application/fasta_split.java
forester/java/src/org/forester/application/table2fasta.java [new file with mode: 0644]

index 5b10110..544701a 100644 (file)
 //
 // Contact: phylosoft @ gmail . com
 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
+//
+//
+// "java -Xmx1024m -cp path\to\forester.jar org.forester.application.fasta_split
+// 
+//
 
 package org.forester.application;
 
@@ -46,7 +51,7 @@ public final class fasta_split {
 
     final static private String PRG_NAME    = "fasta_split";
     final static private String PRG_VERSION = "1.00";
-    final static private String PRG_DATE    = "150320";
+    final static private String PRG_DATE    = "150325";
 
     public static void main( final String args[] ) {
         ForesterUtil.printProgramInformation( fasta_split.PRG_NAME, fasta_split.PRG_VERSION, fasta_split.PRG_DATE );
@@ -89,7 +94,9 @@ public final class fasta_split {
             ForesterUtil.fatalError( PRG_NAME, infile + " appears empty" );
         }
         final Map<String, List<MolecularSequence>> output = new HashMap<String, List<MolecularSequence>>();
+        int cc = 0;
         for( final MolecularSequence seq : seqs ) {
+            System.out.println( ++cc );
             final Matcher m = pa.matcher( seq.getIdentifier() );
             if ( m.find() ) {
                 final String key = m.group( 1 );
@@ -99,7 +106,13 @@ public final class fasta_split {
                 output.get( key ).add( seq );
             }
             else {
-                ForesterUtil.fatalError( PRG_NAME, pattern_str + " not found in sequence " + seq.getIdentifier() );
+                //ForesterUtil.fatalError( PRG_NAME, pattern_str + " not found in sequence \"" + seq.getIdentifier() + "\"" );
+                System.out.println( "warning: " + pattern_str + " not found in sequence \"" + seq.getIdentifier() + "\"" );
+                final String key = "unknown";
+                if ( !output.containsKey( key ) ) {
+                    output.put( key, new ArrayList<MolecularSequence>() );
+                }
+                output.get( key ).add( seq );
             }
         }
         int c = 0;
diff --git a/forester/java/src/org/forester/application/table2fasta.java b/forester/java/src/org/forester/application/table2fasta.java
new file mode 100644 (file)
index 0000000..3735d2e
--- /dev/null
@@ -0,0 +1,105 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
+//
+//
+// "java -Xmx1024m -cp path\to\forester.jar org.forester.application.fasta_split
+//
+//
+
+package org.forester.application;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.forester.io.writers.SequenceWriter;
+import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.MolecularSequence;
+import org.forester.util.BasicTable;
+import org.forester.util.BasicTableParser;
+import org.forester.util.CommandLineArguments;
+import org.forester.util.ForesterUtil;
+
+public final class table2fasta {
+
+    final static private String PRG_NAME    = "table2fasta";
+    final static private String PRG_VERSION = "1.00";
+    final static private String PRG_DATE    = "150327";
+
+    public static void main( final String args[] ) {
+        ForesterUtil.printProgramInformation( table2fasta.PRG_NAME, table2fasta.PRG_VERSION, table2fasta.PRG_DATE );
+        System.out.println();
+        if ( ( args.length != 3 ) ) {
+            table2fasta.argumentsError();
+        }
+        CommandLineArguments cla = null;
+        try {
+            cla = new CommandLineArguments( args );
+        }
+        catch ( final Exception e ) {
+            ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
+        }
+        final int position = Integer.parseInt( cla.getName( 0 ) );
+        final File intable = cla.getFile( 1 );
+        final File outfile = cla.getFile( 2 );
+        BasicTable<String> t = null;
+        try {
+            t = BasicTableParser.parse( intable, '\t' );
+        }
+        catch ( final IOException e ) {
+            e.printStackTrace();
+        }
+        final List<MolecularSequence> seqs = new ArrayList<MolecularSequence>();
+        for( int r = 0; r < t.getNumberOfRows(); ++r ) {
+            String seq = null;
+            final StringBuilder id = new StringBuilder();
+            for( int c = 0; c < t.getNumberOfColumns(); ++c ) {
+                if ( c == position ) {
+                    seq = t.getValue( c, r );
+                }
+                else {
+                    id.append( t.getValue( c, r ) );
+                    id.append( " " );
+                }
+            }
+            final MolecularSequence s = BasicSequence.createDnaSequence( id.toString().trim(), seq );
+            seqs.add( s );
+        }
+        try {
+            SequenceWriter.writeSeqs( seqs, outfile, SEQ_FORMAT.FASTA, 6 );
+        }
+        catch ( final IOException e ) {
+            e.printStackTrace();
+        }
+    }
+
+    private static void argumentsError() {
+        System.out.println( PRG_NAME + " <position> <infile> <outfile>" );
+        System.out.println();
+        System.exit( -1 );
+    }
+}