From 3e0de286efdce95f266e91b2ef5f974041156b19 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 16 Apr 2014 18:29:48 +0000 Subject: [PATCH] reordered --- .../src/org/forester/archaeopteryx/Constants.java | 2 +- .../org/forester/io/parsers/util/ParserUtils.java | 55 ++++++++++++-------- forester/java/src/org/forester/test/Test.java | 41 +++++++++++++++ .../src/org/forester/util/ForesterConstants.java | 2 +- 4 files changed, 76 insertions(+), 24 deletions(-) diff --git a/forester/java/src/org/forester/archaeopteryx/Constants.java b/forester/java/src/org/forester/archaeopteryx/Constants.java index 0ad0eb9..923c109 100644 --- a/forester/java/src/org/forester/archaeopteryx/Constants.java +++ b/forester/java/src/org/forester/archaeopteryx/Constants.java @@ -43,7 +43,7 @@ public final class Constants { public final static boolean ALLOW_DDBJ_BLAST = false; public final static String PRG_NAME = "Archaeopteryx"; final static String VERSION = "0.988 SR"; - final static String PRG_DATE = "140415"; + final static String PRG_DATE = "140416"; final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file"; final static String[] DEFAULT_FONT_CHOICES = { "Arial", "Helvetica", "Verdana", "Tahoma", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" }; diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index d056ba3..9168c9d 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -55,13 +55,17 @@ import org.forester.util.ForesterUtil; public final class ParserUtils { - final public static String TAX_CODE = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA"; final private static String SN_BN = "[A-Z][a-z]{2,30}[_ ][a-z]{3,30}"; + final public static String TAX_CODE = "(?:[A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA"; + final public static String TAX_CODE_LO = "(?:[A-Z]{5})|RAT|PIG|PEA"; final public static Pattern TAXOMONY_CODE_PATTERN_A = Pattern.compile( "(?:\\b|_)(" + TAX_CODE - + ")\\b" ); + + ")(?:\\b|_)" ); + final public static Pattern TAXOMONY_CODE_PATTERN_A_LO = Pattern.compile( "(?:\\b|_)(" + TAX_CODE_LO + + ")(?:\\b|_)" ); final public static Pattern TAXOMONY_CODE_PATTERN_BRACKETED = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" ); final public static Pattern TAXOMONY_CODE_PATTERN_PFR = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_(" + TAX_CODE + ")\\b" ); + final public static Pattern TAXOMONY_SN_PATTERN_GENUS = Pattern.compile( "([A-Z][a-z]{2,30})" ); final public static Pattern TAXOMONY_SN_PATTERN_SN = Pattern.compile( "(?:\\b|_)(" + SN_BN + ")(?:(\\s*$)|([_ ][a-z]*[A-Z0-9]))" ); final public static Pattern TAXOMONY_SN_PATTERN_SNS = Pattern.compile( "(?:\\b|_)(" + SN_BN @@ -69,6 +73,8 @@ public final class ParserUtils { + ")[_ ][a-z]*[A-Z0-9]" ); final public static Pattern TAXOMONY_SN_PATTERN_SNS2 = Pattern.compile( "[A-Z0-9][a-z]*[_ ](" + SN_BN + "[_ ][a-z]{3,30}" + ")\\s*$" ); + final public static Pattern TAXOMONY_SN_PATTERN_SP = Pattern + .compile( "(?:\\b|_)([A-Z][a-z]{2,30}[_ ]sp\\.?)(?:\\b|_)?" ); final public static Pattern TAXOMONY_SN_PATTERN_STRAIN_1 = Pattern .compile( "(?:\\b|_)(" + SN_BN @@ -81,9 +87,6 @@ public final class ParserUtils { .compile( "(?:\\b|_)(" + SN_BN + "[_ ]str[a-z]{0,3}\\.?[_ ]\\S{1,60}[_ ]substr[a-z]{0,3}\\.?[_ ]\\S{1,60})(?:\\b|_)" ); - final public static Pattern TAXOMONY_SN_PATTERN_SP = Pattern - .compile( "(?:\\b|_)([A-Z][a-z]{2,30}[_ ]sp\\.?)(?:\\b|_)?" ); - final public static Pattern TAXOMONY_SN_PATTERN_GENUS = Pattern.compile( "([A-Z][a-z]{2,30})" ); final private static Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_(" + TAX_CODE + ")/\\d+-\\d+\\b" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern @@ -327,7 +330,23 @@ public final class ParserUtils { return id; } else { - final String code = extractTaxonomyCodeFromNodeName( node.getName(), taxonomy_extraction ); + String code = null; + if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) { + code = extractTaxonomyCodeFromNodeNameLettersOnly( node.getName() ); + if ( ForesterUtil.isEmpty( code ) ) { + final String sn = extractScientificNameFromNodeName( node.getName() ); + if ( !ForesterUtil.isEmpty( sn ) ) { + if ( !node.getNodeData().isHasTaxonomy() ) { + node.getNodeData().setTaxonomy( new Taxonomy() ); + } + node.getNodeData().getTaxonomy().setScientificName( sn ); + return sn; + } + } + } + if ( ForesterUtil.isEmpty( code ) ) { + code = extractTaxonomyCodeFromNodeName( node.getName(), taxonomy_extraction ); + } if ( !ForesterUtil.isEmpty( code ) ) { if ( !node.getNodeData().isHasTaxonomy() ) { node.getNodeData().setTaxonomy( new Taxonomy() ); @@ -335,16 +354,6 @@ public final class ParserUtils { node.getNodeData().getTaxonomy().setTaxonomyCode( code ); return code; } - else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) { - final String sn = extractScientificNameFromNodeName( node.getName() ); - if ( !ForesterUtil.isEmpty( sn ) ) { - if ( !node.getNodeData().isHasTaxonomy() ) { - node.getNodeData().setTaxonomy( new Taxonomy() ); - } - node.getNodeData().getTaxonomy().setScientificName( sn ); - return sn; - } - } } return null; } @@ -361,12 +370,6 @@ public final class ParserUtils { if ( m.find() ) { return m.group( 1 ); } - //else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) { - // m = TAXOMONY_UNIPROT_ID_PATTERN_A.matcher( name ); - // if ( m.find() ) { - // return m.group( 1 ); - // } - //} } return null; } @@ -417,4 +420,12 @@ public final class ParserUtils { } return parser; } + + private final static String extractTaxonomyCodeFromNodeNameLettersOnly( final String name ) { + final Matcher m = TAXOMONY_CODE_PATTERN_A_LO.matcher( name ); + if ( m.find() ) { + return m.group( 1 ); + } + return null; + } } diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index f57b0be..5ff446f 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -12218,6 +12218,47 @@ public final class Test { System.out.println( n21.toString() ); return false; } + final PhylogenyNode n22 = PhylogenyNode + .createInstanceFromNhxString( "NEMVE_Nematostella_vectensis", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n22.getNodeData().getTaxonomy().getTaxonomyCode().equals( "NEMVE" ) ) { + System.out.println( n22.toString() ); + return false; + } + final PhylogenyNode n23 = PhylogenyNode + .createInstanceFromNhxString( "9EMVE_Nematostella_vectensis", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n23.getNodeData().getTaxonomy().getScientificName().equals( "Nematostella vectensis" ) ) { + System.out.println( n23.toString() ); + return false; + } + final PhylogenyNode n24 = PhylogenyNode + .createInstanceFromNhxString( "9EMVE_Nematostella", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n24.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9EMVE" ) ) { + System.out.println( n24.toString() ); + return false; + } + // + final PhylogenyNode n25 = PhylogenyNode + .createInstanceFromNhxString( "Nematostella_vectensis_NEMVE", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n25.getNodeData().getTaxonomy().getTaxonomyCode().equals( "NEMVE" ) ) { + System.out.println( n25.toString() ); + return false; + } + final PhylogenyNode n26 = PhylogenyNode + .createInstanceFromNhxString( "Nematostella_vectensis_9EMVE", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n26.getNodeData().getTaxonomy().getScientificName().equals( "Nematostella vectensis" ) ) { + System.out.println( n26.toString() ); + return false; + } + final PhylogenyNode n27 = PhylogenyNode + .createInstanceFromNhxString( "Nematostella_9EMVE", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n27.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9EMVE" ) ) { + System.out.println( n27.toString() ); + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); diff --git a/forester/java/src/org/forester/util/ForesterConstants.java b/forester/java/src/org/forester/util/ForesterConstants.java index 3490f88..a98df7a 100644 --- a/forester/java/src/org/forester/util/ForesterConstants.java +++ b/forester/java/src/org/forester/util/ForesterConstants.java @@ -28,7 +28,7 @@ package org.forester.util; public final class ForesterConstants { public final static String FORESTER_VERSION = "1.032"; - public final static String FORESTER_DATE = "140415"; + public final static String FORESTER_DATE = "140416"; public final static String PHYLO_XML_VERSION = "1.10"; public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org"; public final static String PHYLO_XML_XSD = "phyloxml.xsd"; -- 1.7.10.2