From 7a7a89b113566b2749f1e8e4fbb3064241145201 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Thu, 4 Apr 2013 06:19:52 +0000 Subject: [PATCH] bugfix --- .../java/src/org/forester/application/rio.java | 4 +- .../src/org/forester/io/parsers/nhx/NHXParser.java | 2 +- .../org/forester/io/parsers/util/ParserUtils.java | 17 ++--- .../org/forester/io/writers/SequenceWriter.java | 27 +------ forester/java/src/org/forester/rio/RIO.java | 4 +- forester/java/src/org/forester/rio/TestRIO.java | 4 +- forester/java/src/org/forester/test/Test.java | 78 +++++++++++++++++--- 7 files changed, 85 insertions(+), 51 deletions(-) diff --git a/forester/java/src/org/forester/application/rio.java b/forester/java/src/org/forester/application/rio.java index 1b9687e..4f90ca0 100644 --- a/forester/java/src/org/forester/application/rio.java +++ b/forester/java/src/org/forester/application/rio.java @@ -319,13 +319,13 @@ public class rio { final NHXParser nhx = ( NHXParser ) p; nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE ); + nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); } else if ( p instanceof NexusPhylogeniesParser ) { final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; nex.setReplaceUnderscores( false ); nex.setIgnoreQuotes( true ); - nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE ); + nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); } else { throw new RuntimeException( "unknown parser type: " + p ); diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java index 8846374..0172cce 100644 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java +++ b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java @@ -711,6 +711,6 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse } public static enum TAXONOMY_EXTRACTION { - NO, AGRESSIVE, PFAM_STYLE_RELAXED, PFAM_STYLE_STRICT; + NO, PFAM_STYLE_RELAXED, PFAM_STYLE_STRICT; } } diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index 4d74229..02ed252 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -254,8 +254,7 @@ public final class ParserUtils { } } } - if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) - || ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGRESSIVE ) ) { + if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) ) { final Matcher m1 = TAXOMONY_CODE_PATTERN_1.matcher( name ); if ( m1.matches() ) { return name; @@ -279,6 +278,9 @@ public final class ParserUtils { public final static String extractTaxonomyDataFromNodeName( final PhylogenyNode node, final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction ) throws PhyloXmlDataFormatException { + if ( taxonomy_extraction == TAXONOMY_EXTRACTION.NO ) { + throw new IllegalArgumentException(); + } final String id = extractUniprotTaxonomyIdFromNodeName( node.getName(), taxonomy_extraction ); if ( !ForesterUtil.isEmpty( id ) ) { if ( !node.getNodeData().isHasTaxonomy() ) { @@ -301,8 +303,7 @@ public final class ParserUtils { return code; } } - else if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) - || ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGRESSIVE ) ) { + else if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) ) { final String sn = extractScientificNameFromNodeName( node.getName() ); if ( !ForesterUtil.isEmpty( sn ) ) { if ( !node.getNodeData().isHasTaxonomy() ) { @@ -321,7 +322,7 @@ public final class ParserUtils { public final static String extractUniprotTaxonomyIdFromNodeName( final String name, final TAXONOMY_EXTRACTION taxonomy_extraction ) { if ( ( name.indexOf( "_" ) > 0 ) - && ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) || ( name.indexOf( "/" ) > 4 ) ) ) { + && ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) || ( name.indexOf( "/" ) > 4 ) ) ) { final String[] s = name.split( "[_\\s]" ); if ( s.length > 1 ) { final String str = s[ 1 ]; @@ -345,12 +346,6 @@ public final class ParserUtils { } } } - if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGRESSIVE ) { - final Matcher m1 = TAXOMONY_UNIPROT_ID_PATTERN_1.matcher( name ); - if ( m1.matches() ) { - return name; - } - } return null; } diff --git a/forester/java/src/org/forester/io/writers/SequenceWriter.java b/forester/java/src/org/forester/io/writers/SequenceWriter.java index b8e80d2..8e6ef95 100644 --- a/forester/java/src/org/forester/io/writers/SequenceWriter.java +++ b/forester/java/src/org/forester/io/writers/SequenceWriter.java @@ -15,28 +15,7 @@ public class SequenceWriter { } public static StringBuilder toFasta( final Sequence seq, final int width ) { - final StringBuilder sb = new StringBuilder(); - sb.append( ">" ); - sb.append( seq.getIdentifier().toString() ); - sb.append( ForesterUtil.LINE_SEPARATOR ); - if ( ( width < 1 ) || ( width >= seq.getLength() ) ) { - sb.append( seq.getMolecularSequence() ); - } - else { - final int lines = seq.getLength() / width; - final int rest = seq.getLength() - ( lines * width ); - for( int i = 0; i < lines; ++i ) { - sb.append( seq.getMolecularSequence(), i * width, width ); - if ( i < ( lines - 1 ) ) { - sb.append( ForesterUtil.LINE_SEPARATOR ); - } - } - if ( rest > 0 ) { - sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( seq.getMolecularSequence(), lines * width, rest ); - } - } - return sb; + return toFasta( seq.getIdentifier(), seq.getMolecularSequenceAsString(), width ); } public static StringBuilder toFasta( final String name, final String mol_seq, final int width ) { @@ -51,14 +30,14 @@ public class SequenceWriter { final int lines = mol_seq.length() / width; final int rest = mol_seq.length() - ( lines * width ); for( int i = 0; i < lines; ++i ) { - sb.append( mol_seq, i * width, width ); + sb.append( mol_seq, i * width, ( i + 1 ) * width ); if ( i < ( lines - 1 ) ) { sb.append( ForesterUtil.LINE_SEPARATOR ); } } if ( rest > 0 ) { sb.append( ForesterUtil.LINE_SEPARATOR ); - sb.append( mol_seq, lines * width, rest ); + sb.append( mol_seq, lines * width, mol_seq.length() ); } } return sb; diff --git a/forester/java/src/org/forester/rio/RIO.java b/forester/java/src/org/forester/rio/RIO.java index b19d327..45d5633 100644 --- a/forester/java/src/org/forester/rio/RIO.java +++ b/forester/java/src/org/forester/rio/RIO.java @@ -901,13 +901,13 @@ public final class RIO { final NHXParser nhx = ( NHXParser ) p; nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE ); + nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); } else if ( p instanceof NexusPhylogeniesParser ) { final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p; nex.setReplaceUnderscores( false ); nex.setIgnoreQuotes( true ); - nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE ); + nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); } return factory.create( gene_trees_file, p ); } diff --git a/forester/java/src/org/forester/rio/TestRIO.java b/forester/java/src/org/forester/rio/TestRIO.java index 9cacc29..03a72cd 100644 --- a/forester/java/src/org/forester/rio/TestRIO.java +++ b/forester/java/src/org/forester/rio/TestRIO.java @@ -48,7 +48,7 @@ public final class TestRIO { final NHXParser nhx = new NHXParser(); nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE ); + nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); // final String gene_trees_00_str = "(MOUSE,RAT);(MOUSE,RAT);(MOUSE,RAT);(RAT,MOUSE);"; final Phylogeny[] gene_trees_00 = factory.create( gene_trees_00_str, nhx ); @@ -740,7 +740,7 @@ public final class TestRIO { final NHXParser nhx = new NHXParser(); nhx.setReplaceUnderscores( false ); nhx.setIgnoreQuotes( true ); - nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE ); + nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);" + "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));" + "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);"; diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 4ba7758..0a9066a 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -57,6 +57,7 @@ import org.forester.io.parsers.phyloxml.PhyloXmlParser; import org.forester.io.parsers.tol.TolParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; +import org.forester.io.writers.SequenceWriter; import org.forester.msa.BasicMsa; import org.forester.msa.Mafft; import org.forester.msa.Msa; @@ -173,6 +174,15 @@ public final class Test { System.exit( -1 ); } final long start_time = new Date().getTime(); + System.out.print( "Sequence writer: " ); + if ( testSequenceWriter() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Sequence id parsing: " ); if ( testSequenceIdParsing() ) { System.out.println( "OK." ); @@ -1145,7 +1155,7 @@ public final class Test { .equals( "MOUSE" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.AGRESSIVE ) + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) .equals( "MOUSE" ) ) { return false; } @@ -1156,7 +1166,7 @@ public final class Test { if ( ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) != null ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "x_MOUSE_x", TAXONOMY_EXTRACTION.AGRESSIVE ) + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "x_MOUSE_x", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) .equals( "MOUSE" ) ) { return false; } @@ -6235,12 +6245,6 @@ public final class Test { System.out.println( n1.toString() ); return false; } - final PhylogenyNode n2 = PhylogenyNode - .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE ); - if ( !n2.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { - System.out.println( n2.toString() ); - return false; - } final PhylogenyNode n2x = PhylogenyNode .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n2x.getNodeData().isHasTaxonomy() ) { @@ -6649,7 +6653,7 @@ public final class Test { return false; } final PhylogenyNode n13 = PhylogenyNode - .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE ); + .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n13.getName().equals( "blah_12345/1-2" ) ) { return false; } @@ -6742,6 +6746,31 @@ public final class Test { if ( n32.getNodeData().isHasTaxonomy() ) { return false; } + final PhylogenyNode n40 = PhylogenyNode + .createInstanceFromNhxString( "bcl2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( !n40.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { + return false; + } + final PhylogenyNode n41 = PhylogenyNode + .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n41.getNodeData().isHasTaxonomy() ) { + return false; + } + final PhylogenyNode n42 = PhylogenyNode + .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ); + if ( n42.getNodeData().isHasTaxonomy() ) { + return false; + } + final PhylogenyNode n43 = PhylogenyNode.createInstanceFromNhxString( "12345", + NHXParser.TAXONOMY_EXTRACTION.NO ); + if ( n43.getNodeData().isHasTaxonomy() ) { + return false; + } + final PhylogenyNode n44 = PhylogenyNode + .createInstanceFromNhxString( "12345~1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( n44.getNodeData().isHasTaxonomy() ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -9552,6 +9581,37 @@ public final class Test { return true; } + private static boolean testSequenceWriter() { + try { + final String n = ForesterUtil.LINE_SEPARATOR; + if ( !SequenceWriter.toFasta( "name", "awes", 5 ).toString().equals( ">name" + n + "awes" ) ) { + return false; + } + if ( !SequenceWriter.toFasta( "name", "awes", 4 ).toString().equals( ">name" + n + "awes" ) ) { + return false; + } + if ( !SequenceWriter.toFasta( "name", "awes", 3 ).toString().equals( ">name" + n + "awe" + n + "s" ) ) { + return false; + } + if ( !SequenceWriter.toFasta( "name", "awes", 2 ).toString().equals( ">name" + n + "aw" + n + "es" ) ) { + return false; + } + if ( !SequenceWriter.toFasta( "name", "awes", 1 ).toString() + .equals( ">name" + n + "a" + n + "w" + n + "e" + n + "s" ) ) { + return false; + } + if ( !SequenceWriter.toFasta( "name", "abcdefghij", 3 ).toString() + .equals( ">name" + n + "abc" + n + "def" + n + "ghi" + n + "j" ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace(); + return false; + } + return true; + } + private static boolean testCreateBalancedPhylogeny() { try { final Phylogeny p0 = DevelopmentTools.createBalancedPhylogeny( 6, 5 ); -- 1.7.10.2