in progress
[jalview.git] / forester / java / src / org / forester / io / parsers / nhx / NHXParser.java
index 600f00f..99a78bf 100644 (file)
@@ -39,6 +39,7 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
 import org.forester.io.parsers.util.ParserUtils;
 import org.forester.io.parsers.util.PhylogenyParserException;
 import org.forester.phylogeny.Phylogeny;
@@ -60,40 +61,34 @@ import org.forester.util.ForesterUtil;
 
 public final class NHXParser implements PhylogenyParser {
 
-    public static final boolean                              LIMIT_SPECIES_NAMES_TO_FIVE_CHARS = true;
-    public static final PhylogenyMethods.TAXONOMY_EXTRACTION TAXONOMY_EXTRACTION_DEFAULT       = PhylogenyMethods.TAXONOMY_EXTRACTION.NO;
-    final static private boolean                             GUESS_ROOTEDNESS_DEFAULT          = true;
-    final static private boolean                             GUESS_IF_SUPPORT_VALUES           = true;
-    final static private boolean                             IGNORE_QUOTES_DEFAULT             = false;
-    final static public boolean                              REPLACE_UNDERSCORES_DEFAULT       = false;
-    private boolean                                          _saw_closing_paren;
-    final static private byte                                STRING                            = 0;
-    final static private byte                                STRING_BUFFER                     = 1;
-    final static private byte                                CHAR_ARRAY                        = 2;
-    final static private byte                                BUFFERED_READER                   = 3;
-    private boolean                                          _guess_rootedness;
-    private boolean                                          _has_next;
-    private boolean                                          _ignore_quotes;
-    private byte                                             _input_type;
-    private int                                              _source_length;
-    private PhylogenyNode                                    _current_node;
-    private StringBuilder                                    _current_anotation;
-    private Object                                           _nhx_source;
-    private int                                              _clade_level;
-    private List<Phylogeny>                                  _phylogenies;
-    private Phylogeny                                        _current_phylogeny;
-    private PhylogenyMethods.TAXONOMY_EXTRACTION             _taxonomy_extraction;
-    private boolean                                          _replace_underscores;
-    public final static Pattern                              UC_LETTERS_NUMBERS_PATTERN        = Pattern
-                                                                                                       .compile( "^[A-Z0-9]+$" );
-    public final static Pattern                              NUMBERS_ONLY_PATTERN              = Pattern
-                                                                                                       .compile( "^[0-9\\.]+$" );
-    public final static Pattern                              MB_PROB_PATTERN                   = Pattern
-                                                                                                       .compile( "prob=([^,]+)" );
-    public final static Pattern                              MB_PROB_SD_PATTERN                = Pattern
-                                                                                                       .compile( "prob_stddev=([^,]+)" );
-    public final static Pattern                              MB_BL_PATTERN                     = Pattern
-                                                                                                       .compile( "length_median=([^,]+)" );
+    public static final TAXONOMY_EXTRACTION TAXONOMY_EXTRACTION_DEFAULT = NHXParser.TAXONOMY_EXTRACTION.NO;
+    final static private boolean            GUESS_ROOTEDNESS_DEFAULT    = true;
+    final static private boolean            GUESS_IF_SUPPORT_VALUES     = true;
+    final static private boolean            IGNORE_QUOTES_DEFAULT       = false;
+    final static public boolean             REPLACE_UNDERSCORES_DEFAULT = false;
+    private boolean                         _saw_closing_paren;
+    final static private byte               STRING                      = 0;
+    final static private byte               STRING_BUFFER               = 1;
+    final static private byte               CHAR_ARRAY                  = 2;
+    final static private byte               BUFFERED_READER             = 3;
+    private boolean                         _guess_rootedness;
+    private boolean                         _has_next;
+    private boolean                         _ignore_quotes;
+    private byte                            _input_type;
+    private int                             _source_length;
+    private PhylogenyNode                   _current_node;
+    private StringBuilder                   _current_anotation;
+    private Object                          _nhx_source;
+    private int                             _clade_level;
+    private List<Phylogeny>                 _phylogenies;
+    private Phylogeny                       _current_phylogeny;
+    private NHXParser.TAXONOMY_EXTRACTION   _taxonomy_extraction;
+    private boolean                         _replace_underscores;
+    public final static Pattern             UC_LETTERS_NUMBERS_PATTERN  = Pattern.compile( "^[A-Z0-9]+$" );
+    public final static Pattern             NUMBERS_ONLY_PATTERN        = Pattern.compile( "^[0-9\\.]+$" );
+    public final static Pattern             MB_PROB_PATTERN             = Pattern.compile( "prob=([^,]+)" );
+    public final static Pattern             MB_PROB_SD_PATTERN          = Pattern.compile( "prob_stddev=([^,]+)" );
+    public final static Pattern             MB_BL_PATTERN               = Pattern.compile( "length_median=([^,]+)" );
 
     public NHXParser() {
         init();
@@ -118,8 +113,9 @@ public final class NHXParser implements PhylogenyParser {
      * 
      * @throws PhylogenyParserException
      * @throws NHXFormatException
+     * @throws PhyloXmlDataFormatException 
      */
-    private void finishPhylogeny() throws PhylogenyParserException, NHXFormatException {
+    private void finishPhylogeny() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException {
         setCladeLevel( 0 );
         if ( getCurrentPhylogeny() != null ) {
             parseNHX( getCurrentAnotation().toString(),
@@ -142,7 +138,8 @@ public final class NHXParser implements PhylogenyParser {
         }
     }
 
-    private void finishSingleNodePhylogeny() throws PhylogenyParserException, NHXFormatException {
+    private void finishSingleNodePhylogeny() throws PhylogenyParserException, NHXFormatException,
+            PhyloXmlDataFormatException {
         setCladeLevel( 0 );
         final PhylogenyNode new_node = new PhylogenyNode();
         parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() );
@@ -196,7 +193,7 @@ public final class NHXParser implements PhylogenyParser {
         return _source_length;
     }
 
-    public PhylogenyMethods.TAXONOMY_EXTRACTION getTaxonomyExtraction() {
+    public NHXParser.TAXONOMY_EXTRACTION getTaxonomyExtraction() {
         return _taxonomy_extraction;
     }
 
@@ -270,6 +267,8 @@ public final class NHXParser implements PhylogenyParser {
         setPhylogenies( new ArrayList<Phylogeny>() );
         setCladeLevel( 0 );
         newCurrentAnotation();
+        setCurrentPhylogeny( null );
+        setCurrentNode( null );
         int i = 0;
         while ( true ) {
             char c = '\b';
@@ -399,17 +398,14 @@ public final class NHXParser implements PhylogenyParser {
         return getPhylogeniesAsArray();
     } // parse()
 
-    public Phylogeny parseNext() throws IOException, NHXFormatException {
-        return null;
-    }
-
     /**
      * Called if a closing paren is encountered.
      * 
      * @throws PhylogenyParserException
      * @throws NHXFormatException
+     * @throws PhyloXmlDataFormatException 
      */
-    private void processCloseParen() throws PhylogenyParserException, NHXFormatException {
+    private void processCloseParen() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException {
         decreaseCladeLevel();
         if ( !isSawClosingParen() ) {
             final PhylogenyNode new_node = new PhylogenyNode();
@@ -435,8 +431,9 @@ public final class NHXParser implements PhylogenyParser {
      * 
      * @throws PhylogenyParserException
      * @throws NHXFormatException
+     * @throws PhyloXmlDataFormatException 
      */
-    private void processComma() throws PhylogenyParserException, NHXFormatException {
+    private void processComma() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException {
         if ( !isSawClosingParen() ) {
             final PhylogenyNode new_node = new PhylogenyNode();
             parseNHX( getCurrentAnotation().toString(), new_node, getTaxonomyExtraction(), isReplaceUnderscores() );
@@ -460,8 +457,9 @@ public final class NHXParser implements PhylogenyParser {
      * 
      * @throws PhylogenyParserException
      * @throws NHXFormatException
+     * @throws PhyloXmlDataFormatException 
      */
-    private void processOpenParen() throws PhylogenyParserException, NHXFormatException {
+    private void processOpenParen() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException {
         final PhylogenyNode new_node = new PhylogenyNode();
         if ( getCladeLevel() == 0 ) {
             if ( getCurrentPhylogeny() != null ) {
@@ -602,7 +600,7 @@ public final class NHXParser implements PhylogenyParser {
         _source_length = source_length;
     }
 
-    public void setTaxonomyExtraction( final PhylogenyMethods.TAXONOMY_EXTRACTION taxonomy_extraction ) {
+    public void setTaxonomyExtraction( final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction ) {
         _taxonomy_extraction = taxonomy_extraction;
     }
 
@@ -611,7 +609,7 @@ public final class NHXParser implements PhylogenyParser {
             return Double.valueOf( str ).doubleValue();
         }
         catch ( final NumberFormatException ex ) {
-            throw new NHXFormatException( "error in NH/NHX formatted data: failed to parse number from :" + "\"" + str
+            throw new NHXFormatException( "error in NH/NHX formatted data: failed to parse number from " + "\"" + str
                     + "\"" );
         }
     }
@@ -642,9 +640,10 @@ public final class NHXParser implements PhylogenyParser {
 
     public static void parseNHX( String s,
                                  final PhylogenyNode node_to_annotate,
-                                 final PhylogenyMethods.TAXONOMY_EXTRACTION taxonomy_extraction,
-                                 final boolean replace_underscores ) throws NHXFormatException {
-        if ( ( taxonomy_extraction != PhylogenyMethods.TAXONOMY_EXTRACTION.NO ) && replace_underscores ) {
+                                 final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction,
+                                 final boolean replace_underscores ) throws NHXFormatException,
+            PhyloXmlDataFormatException {
+        if ( ( taxonomy_extraction != NHXParser.TAXONOMY_EXTRACTION.NO ) && replace_underscores ) {
             throw new IllegalArgumentException( "cannot extract taxonomies and replace under scores at the same time" );
         }
         if ( ( s != null ) && ( s.length() > 0 ) ) {
@@ -684,11 +683,9 @@ public final class NHXParser implements PhylogenyParser {
                 if ( !s.startsWith( ":" ) ) {
                     node_to_annotate.setName( t.nextToken() );
                     if ( !replace_underscores
-                            && ( !is_nhx && ( taxonomy_extraction != PhylogenyMethods.TAXONOMY_EXTRACTION.NO ) ) ) {
-                        final String tax = ParserUtils
-                                .extractTaxonomyCodeFromNodeName( node_to_annotate.getName(),
-                                                                  LIMIT_SPECIES_NAMES_TO_FIVE_CHARS,
-                                                                  taxonomy_extraction );
+                            && ( !is_nhx && ( taxonomy_extraction != NHXParser.TAXONOMY_EXTRACTION.NO ) ) ) {
+                        final String tax = ParserUtils.extractTaxonomyCodeFromNodeName( node_to_annotate.getName(),
+                                                                                        taxonomy_extraction );
                         if ( !ForesterUtil.isEmpty( tax ) ) {
                             if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) {
                                 node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() );
@@ -854,4 +851,8 @@ public final class NHXParser implements PhylogenyParser {
         final int blu = ForesterUtil.limitRangeForColor( Integer.parseInt( st.nextToken() ) );
         return new Color( red, green, blu );
     }
+
+    public static enum TAXONOMY_EXTRACTION {
+        NO, YES, PFAM_STYLE_ONLY;
+    }
 }