inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 16 Feb 2013 05:27:58 +0000 (05:27 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 16 Feb 2013 05:27:58 +0000 (05:27 +0000)
forester/java/src/org/forester/archaeopteryx/MainFrame.java
forester/java/src/org/forester/io/parsers/util/ParserUtils.java
forester/java/src/org/forester/test/Test.java

index 37f84e4..8079b12 100644 (file)
@@ -1121,7 +1121,7 @@ public abstract class MainFrame extends JFrame implements ActionListener {
         options.setInternalNumberAreConfidenceForNhParsing( ( _internal_number_are_confidence_for_nh_parsing_cbmi != null )
                 && _internal_number_are_confidence_for_nh_parsing_cbmi.isSelected() );
         if ( ( _extract_taxonomy_yes_rbmi != null ) && _extract_taxonomy_yes_rbmi.isSelected() ) {
-            options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+            options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.AGRESSIVE );
         }
         else if ( ( _extract_taxonomy_pfam_rbmi != null ) && _extract_taxonomy_pfam_rbmi.isSelected() ) {
             options.setTaxonomyExtractio( TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
index d5acd19..bbac039 100644 (file)
@@ -61,6 +61,8 @@ public final class ParserUtils {
                                                                         .compile( "\\b[A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP\\b" );
     final private static Pattern TAXOMONY_CODE_PATTERN_2        = Pattern
                                                                         .compile( "([A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP)[^0-9A-Za-z].*" );
+    final private static Pattern TAXOMONY_CODE_PATTERN_3        = Pattern
+                                                                        .compile( "_([A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP)_" );
     final private static Pattern TAXOMONY_CODE_PATTERN_PF       = Pattern
                                                                         .compile( "([A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP)/\\d+-\\d+" );
     final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_1  = Pattern.compile( "\\b\\d{1,7}\\b" );
@@ -256,6 +258,10 @@ public final class ParserUtils {
             if ( m1.matches() ) {
                 return name;
             }
+            final Matcher m3 = TAXOMONY_CODE_PATTERN_3.matcher( name );
+            if ( m3.matches() ) {
+                return m3.group( 1 );
+            }
         }
         return null;
     }
index b80a584..bd8b586 100644 (file)
@@ -897,6 +897,21 @@ public final class Test {
                     .equals( "MOUSE" ) ) {
                 return false;
             }
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.AGRESSIVE )
+                    .equals( "MOUSE" ) ) {
+                return false;
+            }
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+                    .equals( "MOUSE" ) ) {
+                return false;
+            }
+            if ( ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) != null ) {
+                return false;
+            }
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "x_MOUSE_x", TAXONOMY_EXTRACTION.AGRESSIVE )
+                    .equals( "MOUSE" ) ) {
+                return false;
+            }
         }
         catch ( final Exception e ) {
             e.printStackTrace( System.out );