inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 9 Apr 2013 02:30:25 +0000 (02:30 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 9 Apr 2013 02:30:25 +0000 (02:30 +0000)
forester/java/src/org/forester/io/parsers/util/ParserUtils.java
forester/java/src/org/forester/test/Test.java

index 975e558..92275ad 100644 (file)
@@ -58,10 +58,8 @@ public final class ParserUtils {
     final public static String   TAX_CODE                       = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA|CAP";
     final public static Pattern  TAXOMONY_SN_PATTERN            = Pattern
                                                                         .compile( "[A-Z0-9]{2,}_([A-Z][a-z]+_[a-z]{2,}(?:_[a-z][a-z0-9_]+)?)\\b" );
-    final public static Pattern  TAXOMONY_CODE_PATTERN_R1       = Pattern.compile( "[A-Z0-9]+_(" + TAX_CODE
-                                                                        + ")(?:\\b|_)" );
-    final public static Pattern  TAXOMONY_CODE_PATTERN_R2       = Pattern.compile( "(?:\\b|_)(" + TAX_CODE
-                                                                        + ")(?:\\b|_)" );
+    final public static Pattern  TAXOMONY_CODE_PATTERN_R1       = Pattern.compile( "[A-Z0-9]+_(" + TAX_CODE + ")\\b" );
+    final public static Pattern  TAXOMONY_CODE_PATTERN_R2       = Pattern.compile( "(?:\\b|_)(" + TAX_CODE + ")\\b" );
     final private static Pattern TAXOMONY_CODE_PATTERN_PF       = Pattern.compile( "[A-Z0-9]{2,}_(" + TAX_CODE
                                                                         + ")/\\d+-\\d+" );
     final public static Pattern  TAXOMONY_CODE_PATTERN_4        = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" );
index dc1d14f..cbd304d 100644 (file)
@@ -1148,11 +1148,11 @@ public final class Test {
             if ( ParserUtils.extractTaxonomyCodeFromNodeName( "RAT1", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) {
                 return false;
             }
-            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " _MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " _MOUSE", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
                     .equals( "MOUSE" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " _SOYBN_", TAXONOMY_EXTRACTION.AGGRESSIVE )
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( " _SOYBN", TAXONOMY_EXTRACTION.AGGRESSIVE )
                     .equals( "SOYBN" ) ) {
                 return false;
             }
@@ -1180,7 +1180,7 @@ public final class Test {
                     .equals( "SOYBN" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN_", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
                     .equals( "SOYBN" ) ) {
                 return false;
             }
@@ -1188,7 +1188,7 @@ public final class Test {
                     .equals( "SOYBN" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN_qwerty", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "SOYBN qwerty", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
                     .equals( "SOYBN" ) ) {
                 return false;
             }
@@ -1203,7 +1203,7 @@ public final class Test {
             if ( ParserUtils.extractTaxonomyCodeFromNodeName( "xxxSOYBNxxx", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) {
                 return false;
             }
-            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "-SOYBN_", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "-SOYBN~", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
                     .equals( "SOYBN" ) ) {
                 return false;
             }
@@ -1221,7 +1221,7 @@ public final class Test {
                     .equals( "MOUSE" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE_function = 23445",
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_MOUSE+function = 23445",
                                                                TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
                     .equals( "MOUSE" ) ) {
                 return false;
@@ -1243,7 +1243,7 @@ public final class Test {
                                                                TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ).equals( "RAT" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT_function = 23445",
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "BCL2_RAT function = 23445",
                                                                TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ).equals( "RAT" ) ) {
                 return false;
             }
@@ -1276,18 +1276,18 @@ public final class Test {
                     .equals( "MOUSE" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE ", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
                     .equals( "MOUSE" ) ) {
                 return false;
             }
-            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE^", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
                     .equals( "MOUSE" ) ) {
                 return false;
             }
-            if ( ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) != null ) {
+            if ( ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE*", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) != null ) {
                 return false;
             }
-            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "x_MOUSE_x", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+            if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "x_MOUSE=x", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
                     .equals( "MOUSE" ) ) {
                 return false;
             }