inprogress
[jalview.git] / forester / java / src / org / forester / tools / PhylogenyDecorator.java
index f9bcd1f..9d865e1 100644 (file)
@@ -21,7 +21,7 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
 
 package org.forester.tools;
 
@@ -29,9 +29,11 @@ import java.io.File;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.regex.Matcher;
 
 import org.forester.io.parsers.nhx.NHXFormatException;
 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
+import org.forester.io.parsers.util.ParserUtils;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyNode;
 import org.forester.phylogeny.data.Accession;
@@ -215,11 +217,17 @@ public final class PhylogenyDecorator {
         if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
             throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" );
         }
+        if ( map.isEmpty() ) {
+            throw new IllegalArgumentException( "map is empty" );
+        }
         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
             final PhylogenyNode node = iter.next();
             String name = node.getName();
+            String tilde_annotation = null;
             if ( trim_after_tilde && ( name.indexOf( '~' ) > 0 ) ) {
-                name = name.substring( 0, name.indexOf( '~' ) );
+                final int ti = name.indexOf( '~' );
+                tilde_annotation = name.substring( ti );
+                name = name.substring( 0, ti );
             }
             if ( !ForesterUtil.isEmpty( name ) ) {
                 if ( intermediate_map != null ) {
@@ -239,8 +247,14 @@ public final class PhylogenyDecorator {
                         if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) {
                             new_value = extractBracketedScientificNames( node, new_value );
                         }
-                        else if ( extract_bracketed_tax_code && new_value.endsWith( "]" ) ) {
-                            new_value = extractBracketedTaxCodes( node, new_value );
+                        else if ( extract_bracketed_tax_code ) {
+                            if ( ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value ).find() ) {
+                                new_value = extractBracketedTaxCodes( node, new_value );
+                            }
+                            else if ( picky ) {
+                                throw new IllegalArgumentException( " could not get taxonomy from \"" + new_value
+                                        + "\"" );
+                            }
                         }
                         switch ( field ) {
                             case SEQUENCE_ANNOTATION_DESC:
@@ -279,6 +293,9 @@ public final class PhylogenyDecorator {
                                 node.getNodeData().getTaxonomy().setScientificName( new_value );
                                 break;
                             case SEQUENCE_NAME:
+                                if ( trim_after_tilde ) {
+                                    new_value = addTildeAnnotation( tilde_annotation, new_value );
+                                }
                                 if ( PhylogenyDecorator.VERBOSE ) {
                                     System.out.println( name + ": " + new_value );
                                 }
@@ -312,6 +329,9 @@ public final class PhylogenyDecorator {
                                 if ( PhylogenyDecorator.SANITIZE ) {
                                     new_value = PhylogenyDecorator.sanitize( new_value );
                                 }
+                                if ( trim_after_tilde ) {
+                                    new_value = addTildeAnnotation( tilde_annotation, new_value );
+                                }
                                 if ( PhylogenyDecorator.VERBOSE ) {
                                     System.out.println( new_value );
                                 }
@@ -329,6 +349,13 @@ public final class PhylogenyDecorator {
         }
     }
 
+    private final static String addTildeAnnotation( final String tilde_annotation, final String new_value ) {
+        if ( ForesterUtil.isEmpty( tilde_annotation ) ) {
+            return new_value;
+        }
+        return new_value + tilde_annotation;
+    }
+
     public static void decorate( final Phylogeny[] phylogenies,
                                  final Map<String, Map<String, String>> map,
                                  final boolean picky,
@@ -400,7 +427,7 @@ public final class PhylogenyDecorator {
             throws IOException {
         final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
         BasicTable<String> mapping_table = null;
-        mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false, false );
+        mapping_table = BasicTableParser.parse( mapping_table_file, '\t', false, false );
         for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
             final Map<String, String> row_map = new HashMap<String, String>();
             String name = null;
@@ -437,8 +464,11 @@ public final class PhylogenyDecorator {
     }
 
     private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) {
-        final int i = new_value.lastIndexOf( "[" );
-        final String tc = new_value.substring( i + 1, new_value.length() - 1 );
+        final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value );
+        String tc = "?";
+        if ( m.find() ) {
+            tc = m.group( 1 );
+        }
         ForesterUtil.ensurePresenceOfTaxonomy( node );
         try {
             node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
@@ -446,7 +476,7 @@ public final class PhylogenyDecorator {
         catch ( final PhyloXmlDataFormatException e ) {
             throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
         }
-        return new_value.substring( 0, i - 1 ).trim();
+        return new_value; //TODO //FIXME
     }
 
     private static String extractIntermediate( final Map<String, String> intermediate_map, final String name ) {