inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 4 Apr 2014 21:59:42 +0000 (21:59 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 4 Apr 2014 21:59:42 +0000 (21:59 +0000)
forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java
forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java
forester/java/src/org/forester/io/parsers/util/ParserUtils.java
forester/java/src/org/forester/test/Test.java

index 184e35e..b08fa24 100644 (file)
@@ -245,7 +245,7 @@ public class UrlTreeReader implements Runnable {
                 try {
                     JOptionPane.showMessageDialog( null,
                                                    ForesterUtil.wordWrap( "Successfully read in " + trees.length
-                                                           + " evolutionry tree(s) from [" + url + "]", 80 ),
+                                                           + " tree(s) from [" + url + "]", 80 ),
                                                    "Success",
                                                    JOptionPane.INFORMATION_MESSAGE );
                 }
@@ -254,6 +254,10 @@ public class UrlTreeReader implements Runnable {
                 }
                 _main_frame.getContentPane().repaint();
             }
+            else {
+                JOptionPane.showMessageDialog( null, ForesterUtil.wordWrap( "Failed to read in tree(s) from [" + url
+                        + "]", 80 ), "Error", JOptionPane.ERROR_MESSAGE );
+            }
         }
         _main_frame.activateSaveAllIfNeeded();
         System.gc();
index fbbce85..0f21364 100644 (file)
@@ -149,16 +149,16 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P
                                         final boolean is_rooted ) throws IOException {
         _next = null;
         final NHXParser pars = new NHXParser();
-        if ( ( _taxlabels.size() < 1 ) && ( _translate_map.size() < 1 ) ) {
-            pars.setTaxonomyExtraction( _taxonomy_extraction );
-            pars.setReplaceUnderscores( _replace_underscores );
-            pars.setIgnoreQuotes( _ignore_quotes_in_nh_data );
-        }
-        else {
-            pars.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO );
-            pars.setReplaceUnderscores( false );
-            pars.setIgnoreQuotes( false );
-        }
+        // if ( ( _taxlabels.size() < 1 ) && ( _translate_map.size() < 1 ) ) {
+        pars.setTaxonomyExtraction( _taxonomy_extraction );
+        pars.setReplaceUnderscores( _replace_underscores );
+        pars.setIgnoreQuotes( _ignore_quotes_in_nh_data );
+        //}
+        //else {
+        //    pars.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO );
+        //    pars.setReplaceUnderscores( false );
+        //    pars.setIgnoreQuotes( false );
+        //}
         if ( rooted_info_present ) {
             pars.setGuessRootedness( false );
         }
index a465d4a..76f2a18 100644 (file)
@@ -62,11 +62,13 @@ public final class ParserUtils {
                                                                          + TAX_CODE + ")\\b" );
     final public static Pattern  TAXOMONY_SN_PATTERN             = Pattern
                                                                          .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]+_[a-z]{2,}(?:_[a-z][a-z0-9_]+)?)\\b" );
+    final public static Pattern  TAXOMONY_SN_PATTERN_SN          = Pattern
+                                                                         .compile( "\\b([A-Z][a-z]+_[a-z]{2,}(?:_[a-z][a-z0-9_]+)?)(?:\\b|_)" );
     final private static Pattern TAXOMONY_CODE_PATTERN_PFS       = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_("
                                                                          + TAX_CODE + ")/\\d+-\\d+\\b" );
-    final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_A   = Pattern.compile( "(?:\\b|_)(\\d{1,7})\\b" );
+    // final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_A   = Pattern.compile( "(?:\\b|(?:[A-Z]_))(\\d{1,7})\\b" );
     final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern
-                                                                         .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_(\\d{1,7})\\b" );
+                                                                         .compile( "(?:\\b|_)[A-Z0-9]{1,}_(\\d{1,7})\\b" );
     final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern
                                                                          .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" );
 
@@ -199,6 +201,10 @@ public final class ParserUtils {
         if ( m.find() ) {
             return m.group( 1 ).replace( '_', ' ' );
         }
+        final Matcher m_sn = TAXOMONY_SN_PATTERN_SN.matcher( name );
+        if ( m_sn.find() ) {
+            return m_sn.group( 1 ).replace( '_', ' ' );
+        }
         return null;
     }
 
@@ -273,12 +279,12 @@ public final class ParserUtils {
             if ( m.find() ) {
                 return m.group( 1 );
             }
-            else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) {
-                m = TAXOMONY_UNIPROT_ID_PATTERN_A.matcher( name );
-                if ( m.find() ) {
-                    return m.group( 1 );
-                }
-            }
+            //else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) {
+            //    m = TAXOMONY_UNIPROT_ID_PATTERN_A.matcher( name );
+            //    if ( m.find() ) {
+            //        return m.group( 1 );
+            //    }
+            //}
         }
         return null;
     }
index 80a20b6..4023539 100644 (file)
@@ -4066,6 +4066,16 @@ public final class Test {
                     .equals( "Mus musculus" ) ) {
                 return false;
             }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus" ).equals( "Mus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus" )
+                    .equals( "Mus musculus musculus" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_123" ).equals( "Mus musculus" ) ) {
+                return false;
+            }
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );
@@ -8421,8 +8431,8 @@ public final class Test {
                 return false;
             }
             final PhylogenyNode n13 = PhylogenyNode
-                    .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
-            if ( !n13.getName().equals( "blah_12345/1-2" ) ) {
+                    .createInstanceFromNhxString( "BLAH_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+            if ( !n13.getName().equals( "BLAH_12345/1-2" ) ) {
                 return false;
             }
             if ( PhylogenyMethods.getSpecies( n13 ).equals( "12345" ) ) {
@@ -8487,7 +8497,7 @@ public final class Test {
                 return false;
             }
             final PhylogenyNode n19 = PhylogenyNode
-                    .createInstanceFromNhxString( "blah_1-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+                    .createInstanceFromNhxString( "BLAH_1-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( !n19.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) {
                 return false;
             }
@@ -8495,7 +8505,7 @@ public final class Test {
                 return false;
             }
             final PhylogenyNode n30 = PhylogenyNode
-                    .createInstanceFromNhxString( "blah_1234567-roejojoej",
+                    .createInstanceFromNhxString( "BLAH_1234567-roejojoej",
                                                   NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( !n30.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1234567" ) ) {
                 return false;
@@ -8504,7 +8514,7 @@ public final class Test {
                 return false;
             }
             final PhylogenyNode n31 = PhylogenyNode
-                    .createInstanceFromNhxString( "blah_12345678-roejojoej",
+                    .createInstanceFromNhxString( "BLAH_12345678-roejojoej",
                                                   NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( n31.getNodeData().isHasTaxonomy() ) {
                 return false;
@@ -8515,7 +8525,7 @@ public final class Test {
                 return false;
             }
             final PhylogenyNode n40 = PhylogenyNode
-                    .createInstanceFromNhxString( "bcl2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+                    .createInstanceFromNhxString( "BCL2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( !n40.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
                 return false;
             }
@@ -11576,7 +11586,7 @@ public final class Test {
                 return false;
             }
             final PhylogenyNode n3 = PhylogenyNode
-                    .createInstanceFromNhxString( "blag_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+                    .createInstanceFromNhxString( "BLAG_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( !n3.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
                 System.out.println( n3.toString() );
                 return false;
@@ -11594,43 +11604,43 @@ public final class Test {
                 return false;
             }
             final PhylogenyNode n6 = PhylogenyNode
-                    .createInstanceFromNhxString( "blag-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+                    .createInstanceFromNhxString( "BLAG-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( n6.getNodeData().isHasTaxonomy() ) {
                 System.out.println( n6.toString() );
                 return false;
             }
             final PhylogenyNode n7 = PhylogenyNode
-                    .createInstanceFromNhxString( "blag-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+                    .createInstanceFromNhxString( "BLAG-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( n7.getNodeData().isHasTaxonomy() ) {
                 System.out.println( n7.toString() );
                 return false;
             }
             final PhylogenyNode n8 = PhylogenyNode
-                    .createInstanceFromNhxString( "blag_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+                    .createInstanceFromNhxString( "BLAG_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( !n8.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
                 System.out.println( n8.toString() );
                 return false;
             }
             final PhylogenyNode n9 = PhylogenyNode
-                    .createInstanceFromNhxString( "blag_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+                    .createInstanceFromNhxString( "BLAG_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( !n9.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
                 System.out.println( n9.toString() );
                 return false;
             }
             final PhylogenyNode n10x = PhylogenyNode
-                    .createInstanceFromNhxString( "blag_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+                    .createInstanceFromNhxString( "BLAG_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( n10x.getNodeData().isHasTaxonomy() ) {
                 System.out.println( n10x.toString() );
                 return false;
             }
             final PhylogenyNode n10xx = PhylogenyNode
-                    .createInstanceFromNhxString( "blag_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+                    .createInstanceFromNhxString( "BLAG_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( n10xx.getNodeData().isHasTaxonomy() ) {
                 System.out.println( n10xx.toString() );
                 return false;
             }
             final PhylogenyNode n10 = PhylogenyNode
-                    .createInstanceFromNhxString( "blag_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+                    .createInstanceFromNhxString( "BLAG_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
             if ( !n10.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9YX45" ) ) {
                 System.out.println( n10.toString() );
                 return false;