From 9f15c8c3415681ea197bf9d629ee3f2c126fc5b7 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Thu, 21 Mar 2013 17:14:40 +0000 Subject: [PATCH] inprogress --- .../src/org/forester/io/parsers/util/ParserUtils.java | 2 ++ .../java/src/org/forester/tools/PhylogenyDecorator.java | 15 +++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index bbac039..4d74229 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -65,6 +65,8 @@ public final class ParserUtils { .compile( "_([A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP)_" ); final private static Pattern TAXOMONY_CODE_PATTERN_PF = Pattern .compile( "([A-Z9][A-Z]{2}[A-Z0-9]{2}|RAT|PIG|PEA|CAP)/\\d+-\\d+" ); + final public static Pattern TAXOMONY_CODE_PATTERN_4 = Pattern + .compile( "\\[(([A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA|CAP)\\]" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_1 = Pattern.compile( "\\b\\d{1,7}\\b" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_2 = Pattern.compile( "(\\d{1,7})[^0-9A-Za-z].*" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PF = Pattern.compile( "(\\d{1,7})/\\d+-\\d+" ); diff --git a/forester/java/src/org/forester/tools/PhylogenyDecorator.java b/forester/java/src/org/forester/tools/PhylogenyDecorator.java index b6bc705..f1d6e24 100644 --- a/forester/java/src/org/forester/tools/PhylogenyDecorator.java +++ b/forester/java/src/org/forester/tools/PhylogenyDecorator.java @@ -29,9 +29,11 @@ import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.regex.Matcher; import org.forester.io.parsers.nhx.NHXFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; @@ -242,7 +244,8 @@ public final class PhylogenyDecorator { if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) { new_value = extractBracketedScientificNames( node, new_value ); } - else if ( extract_bracketed_tax_code && new_value.endsWith( "]" ) ) { + else if ( extract_bracketed_tax_code + && ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value ).matches() ) { new_value = extractBracketedTaxCodes( node, new_value ); } switch ( field ) { @@ -453,10 +456,10 @@ public final class PhylogenyDecorator { } private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) { - final int i = new_value.lastIndexOf( "[" ); - String tc = new_value.substring( i + 1, new_value.length() - 1 ); - if ( tc.length() == 6 ) { - tc = tc.substring( 0, 5 ); + final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value ); + String tc = null; + if ( m.matches() ) { + tc = m.group( 1 ); } ForesterUtil.ensurePresenceOfTaxonomy( node ); try { @@ -465,7 +468,7 @@ public final class PhylogenyDecorator { catch ( final PhyloXmlDataFormatException e ) { throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc ); } - return new_value.substring( 0, i - 1 ).trim(); + return new_value; //TODO //FIXME } private static String extractIntermediate( final Map intermediate_map, final String name ) { -- 1.7.10.2