mb parsing
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 14 Dec 2011 08:11:21 +0000 (08:11 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Wed, 14 Dec 2011 08:11:21 +0000 (08:11 +0000)
forester/java/src/org/forester/application/surfacing.java
forester/java/src/org/forester/io/parsers/nhx/NHXParser.java
forester/java/src/org/forester/surfacing/SurfacingUtil.java
forester/java/src/org/forester/test/Test.java

index f878fa6..4218c4b 100644 (file)
@@ -99,6 +99,7 @@ import org.forester.util.ForesterUtil;
 
 public class surfacing {
 
+    private static final int                                  MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING                           = 1000;
     public final static String                                DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS                    = "graph_analysis_out";
     public final static String                                DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS                = "_dc.dot";
     public final static String                                PARSIMONY_OUTPUT_FITCH_PRESENT_BC_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS = "_fitch_present_dc.dot";
@@ -2076,7 +2077,7 @@ public class surfacing {
             String my_outfile = output_file.toString();
             Map<Character, Writer> split_writers = null;
             Writer writer = null;
-            if ( similarities.size() > 1000 ) {
+            if ( similarities.size() > MINIMAL_NUMBER_OF_SIMILARITIES_FOR_SPLITTING ) {
                 if ( my_outfile.endsWith( ".html" ) ) {
                     my_outfile = my_outfile.substring( 0, my_outfile.length() - 5 );
                 }
@@ -2429,7 +2430,7 @@ public class surfacing {
         split_writers.put( 'z', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
                 + "_domains_Z.html" ) ) );
         split_writers.put( '0', new BufferedWriter( new FileWriter( out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile
-                + "_09.html" ) ) );
+                + "_domains_0.html" ) ) );
     }
 
     private static void printOutPercentageOfMultidomainProteins( final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo,
index 47771ce..e227fee 100644 (file)
@@ -256,6 +256,7 @@ public final class NHXParser implements PhylogenyParser {
         boolean in_comment = false;
         boolean saw_colon = false;
         boolean saw_open_bracket = false;
+        boolean in_open_bracket = false;
         boolean in_double_quote = false;
         boolean in_single_quote = false;
         setPhylogenies( new ArrayList<Phylogeny>() );
@@ -300,6 +301,9 @@ public final class NHXParser implements PhylogenyParser {
                     saw_colon = false;
                 }
             }
+            if ( in_open_bracket && c == ']' ) {
+                in_open_bracket = false;
+            }
             // \n\t is always ignored,
             // as is " (34) and ' (39) (space is 32):
             if ( ( isIgnoreQuotes() && ( ( c < 33 ) || ( c > 126 ) || ( c == 34 ) || ( c == 39 ) || ( ( getCladeLevel() == 0 ) && ( c == ';' ) ) ) )
@@ -338,6 +342,7 @@ public final class NHXParser implements PhylogenyParser {
             }
             else if ( c == '[' ) {
                 saw_open_bracket = true;
+                in_open_bracket = true;
             }
             else if ( saw_open_bracket ) {
                 if ( c != ']' ) {
@@ -356,13 +361,13 @@ public final class NHXParser implements PhylogenyParser {
                 // comment consisting just of "[]":
                 saw_open_bracket = false;
             }
-            else if ( c == '(' ) {
+            else if ( c == '(' && !in_open_bracket ) {
                 processOpenParen();
             }
-            else if ( c == ')' ) {
+            else if ( c == ')' && !in_open_bracket ) {
                 processCloseParen();
             }
-            else if ( c == ',' ) {
+            else if ( c == ',' && !in_open_bracket ) {
                 processComma();
             }
             else {
@@ -631,6 +636,8 @@ public final class NHXParser implements PhylogenyParser {
                                  final PhylogenyNode node_to_annotate,
                                  final PhylogenyMethods.TAXONOMY_EXTRACTION taxonomy_extraction,
                                  final boolean replace_underscores ) throws NHXFormatException {
+        System.out.println( s );
+        System.out.println();
         if ( ( taxonomy_extraction != PhylogenyMethods.TAXONOMY_EXTRACTION.NO ) && replace_underscores ) {
             throw new IllegalArgumentException( "cannot extract taxonomies and replace under scores at the same time" );
         }
@@ -651,7 +658,7 @@ public final class NHXParser implements PhylogenyParser {
                 b = "";
                 is_nhx = true;
                 if ( cb < 0 ) {
-                    throw new NHXFormatException( "error in NHX formatted data: no closing \"]\"" );
+                    throw new NHXFormatException( "error in NHX formatted data: no closing \"]\" in \"" + s + "\"" );
                 }
                 if ( s.indexOf( "&&NHX" ) == ( ob + 1 ) ) {
                     b = s.substring( ob + 6, cb );
@@ -671,7 +678,7 @@ public final class NHXParser implements PhylogenyParser {
                 }
             }
             t = new StringTokenizer( s, ":" );
-            if ( t.countTokens() >= 1 ) {
+            if ( t.countTokens() > 0 ) {
                 if ( !s.startsWith( ":" ) ) {
                     node_to_annotate.setName( t.nextToken() );
                     if ( !replace_underscores
@@ -690,6 +697,8 @@ public final class NHXParser implements PhylogenyParser {
                 }
                 while ( t.hasMoreTokens() ) {
                     s = t.nextToken();
+                    System.out.println( "=>" + s );
+                    System.out.println();
                     if ( s.startsWith( org.forester.io.parsers.nhx.NHXtags.SPECIES_NAME ) ) {
                         if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) {
                             node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() );
index ddc6602..bd27b56 100644 (file)
@@ -2147,10 +2147,16 @@ public final class SurfacingUtil {
             case SIMPLE_TAB_DELIMITED:
                 break;
             case HTML:
-                for( final Writer w : split_writers.values() ) {
+                for( final Character key : split_writers.keySet() ) {
+                    final Writer w = split_writers.get( key );
                     w.write( "<html>" );
                     w.write( SurfacingConstants.NL );
-                    addHtmlHead( w, "SURFACING :: " + html_title );
+                    if ( key != '_' ) {
+                        addHtmlHead( w, "DCs (" + html_title + ") " + key.toString().toUpperCase() );
+                    }
+                    else {
+                        addHtmlHead( w, "DCs (" + html_title + ")" );
+                    }
                     w.write( SurfacingConstants.NL );
                     w.write( "<body>" );
                     w.write( SurfacingConstants.NL );
index 723ace8..c0b1937 100644 (file)
@@ -230,6 +230,15 @@ public final class Test {
             System.out.println( "failed." );
             failed++;
         }
+        System.out.print( "NHX parsing (MrBayes): " );
+        if ( Test.testNHXParsingMB() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
         System.out.print( "Nexus characters parsing: " );
         if ( Test.testNexusCharactersParsing() ) {
             System.out.println( "OK." );
@@ -4867,7 +4876,7 @@ public final class Test {
             if ( !p2[ 0 ].toNewHampshireX().equals( p2_S ) ) {
                 return false;
             }
-            final String p2b_S = "(((((((A:0.2[&NHX:S=qwerty]):0.2[&:S=uiop]):0.3[&NHX:S=asdf]):0.4[S=zxc]):0.5[]):0.6[&&NH:S=asd]):0.7[&&HX:S=za]):0.8[&&:S=zaq]";
+            final String p2b_S = "(((((((A:0.2[&NHX:S=qw,erty]):0.2[&:S=u(io)p]):0.3[&NHX:S=asdf]):0.4[S=zxc]):0.5[]):0.6[&&NH:S=asd]):0.7[&&HX:S=za]):0.8[&&:S=zaq]";
             final Phylogeny[] p2b = factory.create( p2b_S, new NHXParser() );
             if ( !p2b[ 0 ].toNewHampshireX().equals( "(((((((A:0.2):0.2):0.3):0.4):0.5):0.6):0.7):0.8" ) ) {
                 return false;
@@ -5026,6 +5035,35 @@ public final class Test {
         return true;
     }
 
+    private static boolean testNHXParsingMB() {
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final Phylogeny p1 = factory.create( "(1[&prob=1.000000000000000e+00,prob_stddev=0.000000000000000e+00,"
+                    + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\","
+                    + "prob+-sd=\"100+-0\"]:4.129000000000000e-02[&length_mean=4.153987461671767e-02,"
+                    + "length_median=4.129000000000000e-02,length_95%HPD={3.217800000000000e-02,"
+                    + "5.026800000000000e-02}],2[&prob=1.000000000000000e+00,prob_stddev=0.000000000000000e+00,"
+                    + "prob_range={1.000000000000000e+00,1.000000000000000e+00},prob(percent)=\"100\","
+                    + "prob+-sd=\"100+-0\"]:6.375699999999999e-02[&length_mean=6.395210411945065e-02,"
+                    + "length_median=6.375699999999999e-02,length_95%HPD={5.388600000000000e-02,"
+                    + "7.369400000000000e-02}])", new NHXParser() )[ 0 ];
+            if ( !isEqual( p1.getNode( "1" ).getDistanceToParent(), 4.129e-02 ) ) {
+                System.out.println( p1.getNode( "1" ).getDistanceToParent() );
+                System.exit( -1 );
+                return false;
+            }
+            //  if ( !p1.toNewHampshireX().equals( "(A[&&NHX:S=a_species],B1[&&NHX:S=b_species])" ) ) {
+            //     return false;
+            //  }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            System.exit( -1 );
+            return false;
+        }
+        return true;
+    }
+
     private static boolean testPhylogenyBranch() {
         try {
             final PhylogenyNode a1 = PhylogenyNode.createInstanceFromNhxString( "a" );