X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Ftools%2FPhylogenyDecorator.java;h=9d865e1ef6630955b9b1f286aea650b7c8b389ff;hb=01d681ced8f186561a3dd76715d950bd0eabd82d;hp=f9bcd1f68897917be3525fd408324aaa3f90b5d8;hpb=c7c4e34d403f220262b490dd3cbe5d300f114a18;p=jalview.git diff --git a/forester/java/src/org/forester/tools/PhylogenyDecorator.java b/forester/java/src/org/forester/tools/PhylogenyDecorator.java index f9bcd1f..9d865e1 100644 --- a/forester/java/src/org/forester/tools/PhylogenyDecorator.java +++ b/forester/java/src/org/forester/tools/PhylogenyDecorator.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.tools; @@ -29,9 +29,11 @@ import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.regex.Matcher; import org.forester.io.parsers.nhx.NHXFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; @@ -215,11 +217,17 @@ public final class PhylogenyDecorator { if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) { throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" ); } + if ( map.isEmpty() ) { + throw new IllegalArgumentException( "map is empty" ); + } for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); String name = node.getName(); + String tilde_annotation = null; if ( trim_after_tilde && ( name.indexOf( '~' ) > 0 ) ) { - name = name.substring( 0, name.indexOf( '~' ) ); + final int ti = name.indexOf( '~' ); + tilde_annotation = name.substring( ti ); + name = name.substring( 0, ti ); } if ( !ForesterUtil.isEmpty( name ) ) { if ( intermediate_map != null ) { @@ -239,8 +247,14 @@ public final class PhylogenyDecorator { if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) { new_value = extractBracketedScientificNames( node, new_value ); } - else if ( extract_bracketed_tax_code && new_value.endsWith( "]" ) ) { - new_value = extractBracketedTaxCodes( node, new_value ); + else if ( extract_bracketed_tax_code ) { + if ( ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value ).find() ) { + new_value = extractBracketedTaxCodes( node, new_value ); + } + else if ( picky ) { + throw new IllegalArgumentException( " could not get taxonomy from \"" + new_value + + "\"" ); + } } switch ( field ) { case SEQUENCE_ANNOTATION_DESC: @@ -279,6 +293,9 @@ public final class PhylogenyDecorator { node.getNodeData().getTaxonomy().setScientificName( new_value ); break; case SEQUENCE_NAME: + if ( trim_after_tilde ) { + new_value = addTildeAnnotation( tilde_annotation, new_value ); + } if ( PhylogenyDecorator.VERBOSE ) { System.out.println( name + ": " + new_value ); } @@ -312,6 +329,9 @@ public final class PhylogenyDecorator { if ( PhylogenyDecorator.SANITIZE ) { new_value = PhylogenyDecorator.sanitize( new_value ); } + if ( trim_after_tilde ) { + new_value = addTildeAnnotation( tilde_annotation, new_value ); + } if ( PhylogenyDecorator.VERBOSE ) { System.out.println( new_value ); } @@ -329,6 +349,13 @@ public final class PhylogenyDecorator { } } + private final static String addTildeAnnotation( final String tilde_annotation, final String new_value ) { + if ( ForesterUtil.isEmpty( tilde_annotation ) ) { + return new_value; + } + return new_value + tilde_annotation; + } + public static void decorate( final Phylogeny[] phylogenies, final Map> map, final boolean picky, @@ -400,7 +427,7 @@ public final class PhylogenyDecorator { throws IOException { final Map> map = new HashMap>(); BasicTable mapping_table = null; - mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false, false ); + mapping_table = BasicTableParser.parse( mapping_table_file, '\t', false, false ); for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) { final Map row_map = new HashMap(); String name = null; @@ -437,8 +464,11 @@ public final class PhylogenyDecorator { } private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) { - final int i = new_value.lastIndexOf( "[" ); - final String tc = new_value.substring( i + 1, new_value.length() - 1 ); + final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value ); + String tc = "?"; + if ( m.find() ) { + tc = m.group( 1 ); + } ForesterUtil.ensurePresenceOfTaxonomy( node ); try { node.getNodeData().getTaxonomy().setTaxonomyCode( tc ); @@ -446,7 +476,7 @@ public final class PhylogenyDecorator { catch ( final PhyloXmlDataFormatException e ) { throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc ); } - return new_value.substring( 0, i - 1 ).trim(); + return new_value; //TODO //FIXME } private static String extractIntermediate( final Map intermediate_map, final String name ) {