in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 8 Oct 2013 02:56:47 +0000 (02:56 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Tue, 8 Oct 2013 02:56:47 +0000 (02:56 +0000)
forester_applications/src/org/forester/applications/tax_code_cleaner.java

index d0ea276..94820ab 100644 (file)
@@ -31,6 +31,7 @@
 package org.forester.applications;
 
 import java.io.File;
+import java.util.regex.Pattern;
 
 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
@@ -207,6 +208,14 @@ public class tax_code_cleaner {
                         t.setScientificName( "Xanthoria parietina 46-1" );
                         t.setTaxonomyCode( "" );
                     }
+                    else if ( tc.length() == 6 ) {
+                        Pattern p = Pattern
+                                .compile( "[A-Z9][A-Z]{2}[A-Z0-9]{2}\\d" );
+                        if ( p.matcher( tc ).matches() ) {
+                            t.setTaxonomyCode( tc.substring( 0, 5 ) );
+                        }
+                    }
+                   
                 }
             }
         }