From 674f7858341235991a8d0eda5f55a20243944832 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 14 Dec 2011 08:11:21 +0000 Subject: [PATCH] mb parsing --- .../src/org/forester/application/surfacing.java | 5 ++- .../src/org/forester/io/parsers/nhx/NHXParser.java | 19 +++++++--- .../src/org/forester/surfacing/SurfacingUtil.java | 10 ++++- forester/java/src/org/forester/test/Test.java | 40 +++++++++++++++++++- 4 files changed, 64 insertions(+), 10 deletions(-) diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index f878fa6..4218c4b 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -99,6 +99,7 @@ import org.forester.util.ForesterUtil; public class surfacing { + private static final int MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING = 1000; public final static String DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS = "graph_analysis_out"; public final static String DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_dc.dot"; public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_fitch_present_dc.dot"; @@ -2076,7 +2077,7 @@ public class surfacing { String my_outfile = output_file.toString(); Map split_writers = null; Writer writer = null; - if ( similarities.size() > 1000 ) { + if ( similarities.size() > MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING ) { if ( my_outfile.endsWith( ".html" ) ) { my_outfile = my_outfile.substring( 0, my_outfile.length() - 5 ); } @@ -2429,7 +2430,7 @@ public class surfacing { split_writers.put( 'z', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile + "_domains_Z.html" ) ) ); split_writers.put( '0', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile - + "_09.html" ) ) ); + + "_domains_0.html" ) ) ); } private static void printOutPercentageOfMultidomainProteins( final SortedMap all_genomes_domains_per_potein_histo, diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java index 47771ce..e227fee 100644 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java +++ b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java @@ -256,6 +256,7 @@ public final class NHXParser implements PhylogenyParser { boolean in_comment = false; boolean saw_colon = false; boolean saw_open_bracket = false; + boolean in_open_bracket = false; boolean in_double_quote = false; boolean in_single_quote = false; setPhylogenies( new ArrayList() ); @@ -300,6 +301,9 @@ public final class NHXParser implements PhylogenyParser { saw_colon = false; } } + if ( in_open_bracket && c == ']' ) { + in_open_bracket = false; + } // \n\t is always ignored, // as is " (34) and ' (39) (space is 32): if ( ( isIgnoreQuotes() && ( ( c < 33 ) || ( c > 126 ) || ( c == 34 ) || ( c == 39 ) || ( ( getCladeLevel() == 0 ) && ( c == ';' ) ) ) ) @@ -338,6 +342,7 @@ public final class NHXParser implements PhylogenyParser { } else if ( c == '[' ) { saw_open_bracket = true; + in_open_bracket = true; } else if ( saw_open_bracket ) { if ( c != ']' ) { @@ -356,13 +361,13 @@ public final class NHXParser implements PhylogenyParser { // comment consisting just of "[]": saw_open_bracket = false; } - else if ( c == '(' ) { + else if ( c == '(' && !in_open_bracket ) { processOpenParen(); } - else if ( c == ')' ) { + else if ( c == ')' && !in_open_bracket ) { processCloseParen(); } - else if ( c == ',' ) { + else if ( c == ',' && !in_open_bracket ) { processComma(); } else { @@ -631,6 +636,8 @@ public final class NHXParser implements PhylogenyParser { final PhylogenyNode node_to_annotate, final PhylogenyMethods.TAXONOMY_EXTRACTION taxonomy_extraction, final boolean replace_underscores ) throws NHXFormatException { + System.out.println( s ); + System.out.println(); if ( ( taxonomy_extraction != PhylogenyMethods.TAXONOMY_EXTRACTION.NO ) && replace_underscores ) { throw new IllegalArgumentException( "cannot extract taxonomies and replace under scores at the same time" ); } @@ -651,7 +658,7 @@ public final class NHXParser implements PhylogenyParser { b = ""; is_nhx = true; if ( cb < 0 ) { - throw new NHXFormatException( "error in NHX formatted data: no closing \"]\"" ); + throw new NHXFormatException( "error in NHX formatted data: no closing \"]\" in \"" + s + "\"" ); } if ( s.indexOf( "&&NHX" ) == ( ob + 1 ) ) { b = s.substring( ob + 6, cb ); @@ -671,7 +678,7 @@ public final class NHXParser implements PhylogenyParser { } } t = new StringTokenizer( s, ":" ); - if ( t.countTokens() >= 1 ) { + if ( t.countTokens() > 0 ) { if ( !s.startsWith( ":" ) ) { node_to_annotate.setName( t.nextToken() ); if ( !replace_underscores @@ -690,6 +697,8 @@ public final class NHXParser implements PhylogenyParser { } while ( t.hasMoreTokens() ) { s = t.nextToken(); + System.out.println( "=>" + s ); + System.out.println(); if ( s.startsWith( org.forester.io.parsers.nhx.NHXtags.SPECIES_NAME ) ) { if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) { node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() ); diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index ddc6602..bd27b56 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -2147,10 +2147,16 @@ public final class SurfacingUtil { case SIMPLE_TAB_DELIMITED: break; case HTML: - for( final Writer w : split_writers.values() ) { + for( final Character key : split_writers.keySet() ) { + final Writer w = split_writers.get( key ); w.write( "" ); w.write( SurfacingConstants.NL ); - addHtmlHead( w, "SURFACING :: " + html_title ); + if ( key != '_' ) { + addHtmlHead( w, "DCs (" + html_title + ") " + key.toString().toUpperCase() ); + } + else { + addHtmlHead( w, "DCs (" + html_title + ")" ); + } w.write( SurfacingConstants.NL ); w.write( "" ); w.write( SurfacingConstants.NL ); diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 723ace8..c0b1937 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -230,6 +230,15 @@ public final class Test { System.out.println( "failed." ); failed++; } + System.out.print( "NHX parsing (MrBayes): " ); + if ( Test.testNHXParsingMB() ) { + System.out.println( "OK." ); + succeeded++; + } + else { + System.out.println( "failed." ); + failed++; + } System.out.print( "Nexus characters parsing: " ); if ( Test.testNexusCharactersParsing() ) { System.out.println( "OK." ); @@ -4867,7 +4876,7 @@ public final class Test { if ( !p2[ 0 ].toNewHampshireX().equals( p2_S ) ) { return false; } - final String p2b_S = "(((((((A:0.2[&NHX:S=qwerty]):0.2[&:S=uiop]):0.3[&NHX:S=asdf]):0.4[S=zxc]):0.5[]):0.6[&&NH:S=asd]):0.7[&&HX:S=za]):0.8[&&:S=zaq]"; + final String p2b_S = "(((((((A:0.2[&NHX:S=qw,erty]):0.2[&:S=u(io)p]):0.3[&NHX:S=asdf]):0.4[S=zxc]):0.5[]):0.6[&&NH:S=asd]):0.7[&&HX:S=za]):0.8[&&:S=zaq]"; final Phylogeny[] p2b = factory.create( p2b_S, new NHXParser() ); if ( !p2b[ 0 ].toNewHampshireX().equals( "(((((((A:0.2):0.2):0.3):0.4):0.5):0.6):0.7):0.8" ) ) { return false; @@ -5026,6 +5035,35 @@ public final class Test { return true; } + private static boolean testNHXParsingMB() { + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final Phylogeny p1 = factory.create( "(1[&prob=1.000000000000000e+00,prob_stddev=0.000000000000000e+00," + + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\"," + + "prob+-sd=\"100+-0\"]:4.129000000000000e-02[&length_mean=4.153987461671767e-02," + + "length_median=4.129000000000000e-02,length_95%HPD={3.217800000000000e-02," + + "5.026800000000000e-02}],2[&prob=1.000000000000000e+00,prob_stddev=0.000000000000000e+00," + + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\"," + + "prob+-sd=\"100+-0\"]:6.375699999999999e-02[&length_mean=6.395210411945065e-02," + + "length_median=6.375699999999999e-02,length_95%HPD={5.388600000000000e-02," + + "7.369400000000000e-02}])", new NHXParser() )[ 0 ]; + if ( !isEqual( p1.getNode( "1" ).getDistanceToParent(), 4.129e-02 ) ) { + System.out.println( p1.getNode( "1" ).getDistanceToParent() ); + System.exit( -1 ); + return false; + } + // if ( !p1.toNewHampshireX().equals( "(A[&&NHX:S=a_species],B1[&&NHX:S=b_species])" ) ) { + // return false; + // } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + System.exit( -1 ); + return false; + } + return true; + } + private static boolean testPhylogenyBranch() { try { final PhylogenyNode a1 = PhylogenyNode.createInstanceFromNhxString( "a" ); -- 1.7.10.2