bugfix
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 4 Apr 2013 06:19:52 +0000 (06:19 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Thu, 4 Apr 2013 06:19:52 +0000 (06:19 +0000)
forester/java/src/org/forester/application/rio.java
forester/java/src/org/forester/io/parsers/nhx/NHXParser.java
forester/java/src/org/forester/io/parsers/util/ParserUtils.java
forester/java/src/org/forester/io/writers/SequenceWriter.java
forester/java/src/org/forester/rio/RIO.java
forester/java/src/org/forester/rio/TestRIO.java
forester/java/src/org/forester/test/Test.java

index 1b9687e..4f90ca0 100644 (file)
@@ -319,13 +319,13 @@ public class rio {
                     final NHXParser nhx = ( NHXParser ) p;
                     nhx.setReplaceUnderscores( false );
                     nhx.setIgnoreQuotes( true );
-                    nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE );
+                    nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
                 }
                 else if ( p instanceof NexusPhylogeniesParser ) {
                     final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
                     nex.setReplaceUnderscores( false );
                     nex.setIgnoreQuotes( true );
-                    nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE );
+                    nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
                 }
                 else {
                     throw new RuntimeException( "unknown parser type: " + p );
index 8846374..0172cce 100644 (file)
@@ -711,6 +711,6 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse
     }
 
     public static enum TAXONOMY_EXTRACTION {
-        NO, AGRESSIVE, PFAM_STYLE_RELAXED, PFAM_STYLE_STRICT;
+        NO, PFAM_STYLE_RELAXED, PFAM_STYLE_STRICT;
     }
 }
index 4d74229..02ed252 100644 (file)
@@ -254,8 +254,7 @@ public final class ParserUtils {
                 }
             }
         }
-        if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
-                || ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGRESSIVE ) ) {
+        if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) ) {
             final Matcher m1 = TAXOMONY_CODE_PATTERN_1.matcher( name );
             if ( m1.matches() ) {
                 return name;
@@ -279,6 +278,9 @@ public final class ParserUtils {
     public final static String extractTaxonomyDataFromNodeName( final PhylogenyNode node,
                                                                 final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction )
             throws PhyloXmlDataFormatException {
+        if ( taxonomy_extraction == TAXONOMY_EXTRACTION.NO ) {
+            throw new IllegalArgumentException();
+        }
         final String id = extractUniprotTaxonomyIdFromNodeName( node.getName(), taxonomy_extraction );
         if ( !ForesterUtil.isEmpty( id ) ) {
             if ( !node.getNodeData().isHasTaxonomy() ) {
@@ -301,8 +303,7 @@ public final class ParserUtils {
                     return code;
                 }
             }
-            else if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
-                    || ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGRESSIVE ) ) {
+            else if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) ) {
                 final String sn = extractScientificNameFromNodeName( node.getName() );
                 if ( !ForesterUtil.isEmpty( sn ) ) {
                     if ( !node.getNodeData().isHasTaxonomy() ) {
@@ -321,7 +322,7 @@ public final class ParserUtils {
     public final static String extractUniprotTaxonomyIdFromNodeName( final String name,
                                                                      final TAXONOMY_EXTRACTION taxonomy_extraction ) {
         if ( ( name.indexOf( "_" ) > 0 )
-                && ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) || ( name.indexOf( "/" ) > 4 ) ) ) {
+                && ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) || ( name.indexOf( "/" ) > 4 ) ) ) {
             final String[] s = name.split( "[_\\s]" );
             if ( s.length > 1 ) {
                 final String str = s[ 1 ];
@@ -345,12 +346,6 @@ public final class ParserUtils {
                 }
             }
         }
-        if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGRESSIVE ) {
-            final Matcher m1 = TAXOMONY_UNIPROT_ID_PATTERN_1.matcher( name );
-            if ( m1.matches() ) {
-                return name;
-            }
-        }
         return null;
     }
 
index b8e80d2..8e6ef95 100644 (file)
@@ -15,28 +15,7 @@ public class SequenceWriter {
     }
 
     public static StringBuilder toFasta( final Sequence seq, final int width ) {
-        final StringBuilder sb = new StringBuilder();
-        sb.append( ">" );
-        sb.append( seq.getIdentifier().toString() );
-        sb.append( ForesterUtil.LINE_SEPARATOR );
-        if ( ( width < 1 ) || ( width >= seq.getLength() ) ) {
-            sb.append( seq.getMolecularSequence() );
-        }
-        else {
-            final int lines = seq.getLength() / width;
-            final int rest = seq.getLength() - ( lines * width );
-            for( int i = 0; i < lines; ++i ) {
-                sb.append( seq.getMolecularSequence(), i * width, width );
-                if ( i < ( lines - 1 ) ) {
-                    sb.append( ForesterUtil.LINE_SEPARATOR );
-                }
-            }
-            if ( rest > 0 ) {
-                sb.append( ForesterUtil.LINE_SEPARATOR );
-                sb.append( seq.getMolecularSequence(), lines * width, rest );
-            }
-        }
-        return sb;
+        return toFasta( seq.getIdentifier(), seq.getMolecularSequenceAsString(), width );
     }
 
     public static StringBuilder toFasta( final String name, final String mol_seq, final int width ) {
@@ -51,14 +30,14 @@ public class SequenceWriter {
             final int lines = mol_seq.length() / width;
             final int rest = mol_seq.length() - ( lines * width );
             for( int i = 0; i < lines; ++i ) {
-                sb.append( mol_seq, i * width, width );
+                sb.append( mol_seq, i * width, ( i + 1 ) * width );
                 if ( i < ( lines - 1 ) ) {
                     sb.append( ForesterUtil.LINE_SEPARATOR );
                 }
             }
             if ( rest > 0 ) {
                 sb.append( ForesterUtil.LINE_SEPARATOR );
-                sb.append( mol_seq, lines * width, rest );
+                sb.append( mol_seq, lines * width, mol_seq.length() );
             }
         }
         return sb;
index b19d327..45d5633 100644 (file)
@@ -901,13 +901,13 @@ public final class RIO {
             final NHXParser nhx = ( NHXParser ) p;
             nhx.setReplaceUnderscores( false );
             nhx.setIgnoreQuotes( true );
-            nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE );
+            nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
         }
         else if ( p instanceof NexusPhylogeniesParser ) {
             final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
             nex.setReplaceUnderscores( false );
             nex.setIgnoreQuotes( true );
-            nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE );
+            nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
         }
         return factory.create( gene_trees_file, p );
     }
index 9cacc29..03a72cd 100644 (file)
@@ -48,7 +48,7 @@ public final class TestRIO {
             final NHXParser nhx = new NHXParser();
             nhx.setReplaceUnderscores( false );
             nhx.setIgnoreQuotes( true );
-            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE );
+            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             //
             final String gene_trees_00_str = "(MOUSE,RAT);(MOUSE,RAT);(MOUSE,RAT);(RAT,MOUSE);";
             final Phylogeny[] gene_trees_00 = factory.create( gene_trees_00_str, nhx );
@@ -740,7 +740,7 @@ public final class TestRIO {
             final NHXParser nhx = new NHXParser();
             nhx.setReplaceUnderscores( false );
             nhx.setIgnoreQuotes( true );
-            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE );
+            nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);"
                     + "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));"
                     + "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);";
index 4ba7758..0a9066a 100644 (file)
@@ -57,6 +57,7 @@ import org.forester.io.parsers.phyloxml.PhyloXmlParser;
 import org.forester.io.parsers.tol.TolParser;
 import org.forester.io.parsers.util.ParserUtils;
 import org.forester.io.writers.PhylogenyWriter;
+import org.forester.io.writers.SequenceWriter;
 import org.forester.msa.BasicMsa;
 import org.forester.msa.Mafft;
 import org.forester.msa.Msa;
@@ -173,6 +174,15 @@ public final class Test {
             System.exit( -1 );
         }
         final long start_time = new Date().getTime();
+        System.out.print( "Sequence writer: " );
+        if ( testSequenceWriter() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
         System.out.print( "Sequence id parsing: " );
         if ( testSequenceIdParsing() ) {
             System.out.println( "OK." );
@@ -1145,7 +1155,7 @@ public final class Test {
                     .equals( "MOUSE" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.AGRESSIVE )
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
                     .equals( "MOUSE" ) ) {
                 return false;
             }
@@ -1156,7 +1166,7 @@ public final class Test {
             if ( ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) != null ) {
                 return false;
             }
-            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "x_MOUSE_x", TAXONOMY_EXTRACTION.AGRESSIVE )
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "x_MOUSE_x", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
                     .equals( "MOUSE" ) ) {
                 return false;
             }
@@ -6235,12 +6245,6 @@ public final class Test {
                 System.out.println( n1.toString() );
                 return false;
             }
-            final PhylogenyNode n2 = PhylogenyNode
-                    .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE );
-            if ( !n2.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
-                System.out.println( n2.toString() );
-                return false;
-            }
             final PhylogenyNode n2x = PhylogenyNode
                     .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( n2x.getNodeData().isHasTaxonomy() ) {
@@ -6649,7 +6653,7 @@ public final class Test {
                 return false;
             }
             final PhylogenyNode n13 = PhylogenyNode
-                    .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE );
+                    .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( !n13.getName().equals( "blah_12345/1-2" ) ) {
                 return false;
             }
@@ -6742,6 +6746,31 @@ public final class Test {
             if ( n32.getNodeData().isHasTaxonomy() ) {
                 return false;
             }
+            final PhylogenyNode n40 = PhylogenyNode
+                    .createInstanceFromNhxString( "bcl2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+            if ( !n40.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
+                return false;
+            }
+            final PhylogenyNode n41 = PhylogenyNode
+                    .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+            if ( n41.getNodeData().isHasTaxonomy() ) {
+                return false;
+            }
+            final PhylogenyNode n42 = PhylogenyNode
+                    .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
+            if ( n42.getNodeData().isHasTaxonomy() ) {
+                return false;
+            }
+            final PhylogenyNode n43 = PhylogenyNode.createInstanceFromNhxString( "12345",
+                                                                                 NHXParser.TAXONOMY_EXTRACTION.NO );
+            if ( n43.getNodeData().isHasTaxonomy() ) {
+                return false;
+            }
+            final PhylogenyNode n44 = PhylogenyNode
+                    .createInstanceFromNhxString( "12345~1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+            if ( n44.getNodeData().isHasTaxonomy() ) {
+                return false;
+            }
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
@@ -9552,6 +9581,37 @@ public final class Test {
         return true;
     }
 
+    private static boolean testSequenceWriter() {
+        try {
+            final String n = ForesterUtil.LINE_SEPARATOR;
+            if ( !SequenceWriter.toFasta( "name", "awes", 5 ).toString().equals( ">name" + n + "awes" ) ) {
+                return false;
+            }
+            if ( !SequenceWriter.toFasta( "name", "awes", 4 ).toString().equals( ">name" + n + "awes" ) ) {
+                return false;
+            }
+            if ( !SequenceWriter.toFasta( "name", "awes", 3 ).toString().equals( ">name" + n + "awe" + n + "s" ) ) {
+                return false;
+            }
+            if ( !SequenceWriter.toFasta( "name", "awes", 2 ).toString().equals( ">name" + n + "aw" + n + "es" ) ) {
+                return false;
+            }
+            if ( !SequenceWriter.toFasta( "name", "awes", 1 ).toString()
+                    .equals( ">name" + n + "a" + n + "w" + n + "e" + n + "s" ) ) {
+                return false;
+            }
+            if ( !SequenceWriter.toFasta( "name", "abcdefghij", 3 ).toString()
+                    .equals( ">name" + n + "abc" + n + "def" + n + "ghi" + n + "j" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace();
+            return false;
+        }
+        return true;
+    }
+
     private static boolean testCreateBalancedPhylogeny() {
         try {
             final Phylogeny p0 = DevelopmentTools.createBalancedPhylogeny( 6, 5 );