From d963c6703bcbb8a75b13d6d69b96a9cba9bc0479 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Wed, 1 Mar 2017 18:07:41 -0800 Subject: [PATCH] need to allow virus taxonomy codes (e.g. HHV3) --- .../org/forester/io/parsers/util/ParserUtils.java | 4 ++-- forester/java/src/org/forester/test/Test.java | 24 ++++++++++---------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index b278cb9..674713d 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -56,8 +56,8 @@ import org.forester.util.ForesterUtil; public final class ParserUtils { final private static String SN_BN = "[A-Z][a-z]{2,30}[_ ][a-z]{3,30}"; - final public static String TAX_CODE = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA|(?:[A-Z1-9]{4})"; - final public static String TAX_CODE_LO = "(?:[A-Z]{5})|RAT|PIG|PEA|(?:[A-Z1-9]{4})"; + final public static String TAX_CODE = "(?:[A-Z0-9]{3,5})"; + final public static String TAX_CODE_LO = "(?:[A-Z]{5})|RAT|PIG|PEA"; final public static Pattern TAXOMONY_CODE_PATTERN_A = Pattern.compile( "(?:\\b|_)(" + TAX_CODE + ")(?:\\b|_)" ); final public static Pattern TAXOMONY_CODE_PATTERN_A_LO = Pattern.compile( "_(" + TAX_CODE_LO diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index fe52a87..d6df5a9 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -5085,7 +5085,7 @@ public final class Test { .equals( "ECOLI" ) ) { return false; } - if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "blag_9YX45-blag", TAXONOMY_EXTRACTION.AGGRESSIVE ) + if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "blagg_9YX45-blag", TAXONOMY_EXTRACTION.AGGRESSIVE ) .equals( "9YX45" ) ) { return false; } @@ -13192,7 +13192,7 @@ public final class Test { return false; } final PhylogenyNode n3 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAGG_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n3.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n3.toString() ); return false; @@ -13210,62 +13210,62 @@ public final class Test { return false; } final PhylogenyNode n6 = PhylogenyNode - .createInstanceFromNhxString( "BLAG-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAGG-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n6.getNodeData().isHasTaxonomy() ) { System.out.println( n6.toString() ); return false; } final PhylogenyNode n7 = PhylogenyNode - .createInstanceFromNhxString( "BLAG-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BL-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n7.getNodeData().isHasTaxonomy() ) { System.out.println( n7.toString() ); return false; } final PhylogenyNode n8 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAGG_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n8.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n8.toString() ); return false; } final PhylogenyNode n9 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAGG_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n9.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n9.toString() ); return false; } final PhylogenyNode n10x = PhylogenyNode - .createInstanceFromNhxString( "BLAG_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAG!_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n10x.getNodeData().isHasTaxonomy() ) { System.out.println( n10x.toString() ); return false; } final PhylogenyNode n10xx = PhylogenyNode - .createInstanceFromNhxString( "BLAG_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAG!_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n10xx.getNodeData().isHasTaxonomy() ) { System.out.println( n10xx.toString() ); return false; } final PhylogenyNode n10 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAGG_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n10.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9YX45" ) ) { System.out.println( n10.toString() ); return false; } final PhylogenyNode n11 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_Mus_musculus", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + .createInstanceFromNhxString( "BLAG@_Mus_musculus", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n11.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { System.out.println( n11.toString() ); return false; } final PhylogenyNode n12 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_Mus_musculus_musculus", + .createInstanceFromNhxString( "BLA_G_Mus_musculus_musculus", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( !n12.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { System.out.println( n12.toString() ); return false; } final PhylogenyNode n13 = PhylogenyNode - .createInstanceFromNhxString( "BLAG_Mus_musculus1", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + .createInstanceFromNhxString( "BLAaG_Mus_musculus1", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); if ( n13.getNodeData().isHasTaxonomy() ) { System.out.println( n13.toString() ); return false; -- 1.7.10.2