From 1485bd5a3acd0d3ba0f33192f04f8596d619c20f Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Tue, 8 Oct 2013 02:56:47 +0000 Subject: [PATCH] in progress --- .../src/org/forester/applications/tax_code_cleaner.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/forester_applications/src/org/forester/applications/tax_code_cleaner.java b/forester_applications/src/org/forester/applications/tax_code_cleaner.java index d0ea276..94820ab 100644 --- a/forester_applications/src/org/forester/applications/tax_code_cleaner.java +++ b/forester_applications/src/org/forester/applications/tax_code_cleaner.java @@ -31,6 +31,7 @@ package org.forester.applications; import java.io.File; +import java.util.regex.Pattern; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlParser; @@ -207,6 +208,14 @@ public class tax_code_cleaner { t.setScientificName( "Xanthoria parietina 46-1" ); t.setTaxonomyCode( "" ); } + else if ( tc.length() == 6 ) { + Pattern p = Pattern + .compile( "[A-Z9][A-Z]{2}[A-Z0-9]{2}\\d" ); + if ( p.matcher( tc ).matches() ) { + t.setTaxonomyCode( tc.substring( 0, 5 ) ); + } + } + } } } -- 1.7.10.2