X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Ftools%2FPhylogenyDecorator.java;h=57ef9f04a7acb3a10f76eb8abaff32ba904150b7;hb=41ea5973f93687513d29e5b7cad1abff8f3adb4b;hp=f4f6c1a61b094cbc0713c648d12154d387cee4cd;hpb=656be28debec520e0e35a8b311114398a40ea366;p=jalview.git diff --git a/forester/java/src/org/forester/tools/PhylogenyDecorator.java b/forester/java/src/org/forester/tools/PhylogenyDecorator.java index f4f6c1a..57ef9f0 100644 --- a/forester/java/src/org/forester/tools/PhylogenyDecorator.java +++ b/forester/java/src/org/forester/tools/PhylogenyDecorator.java @@ -29,9 +29,11 @@ import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; +import java.util.regex.Matcher; import org.forester.io.parsers.nhx.NHXFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Accession; @@ -215,11 +217,17 @@ public final class PhylogenyDecorator { if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) { throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" ); } + if ( map.isEmpty() ) { + throw new IllegalArgumentException( "map is empty" ); + } for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); String name = node.getName(); + String tilde_annotation = null; if ( trim_after_tilde && ( name.indexOf( '~' ) > 0 ) ) { - name = name.substring( 0, name.indexOf( '~' ) ); + final int ti = name.indexOf( '~' ); + tilde_annotation = name.substring( ti ); + name = name.substring( 0, ti ); } if ( !ForesterUtil.isEmpty( name ) ) { if ( intermediate_map != null ) { @@ -239,8 +247,17 @@ public final class PhylogenyDecorator { if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) { new_value = extractBracketedScientificNames( node, new_value ); } - else if ( extract_bracketed_tax_code && new_value.endsWith( "]" ) ) { - new_value = extractBracketedTaxCodes( node, new_value ); + else if ( extract_bracketed_tax_code ) { + if ( ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value ).find() ) { + new_value = extractBracketedTaxCodes( node, new_value ); + } + else if ( ParserUtils.TAXOMONY_CODE_PATTERN_6.matcher( new_value ).find() ) { + new_value = extractBracketedTaxCodes6( node, new_value ); + } + else if ( picky ) { + throw new IllegalArgumentException( " could not get taxonomy from \"" + new_value + + "\"" ); + } } switch ( field ) { case SEQUENCE_ANNOTATION_DESC: @@ -250,7 +267,7 @@ public final class PhylogenyDecorator { if ( !node.getNodeData().isHasSequence() ) { node.getNodeData().setSequence( new Sequence() ); } - final Annotation annotation = new Annotation( "?" ); + final Annotation annotation = new Annotation(); annotation.setDesc( new_value ); node.getNodeData().getSequence().addAnnotation( annotation ); break; @@ -279,6 +296,9 @@ public final class PhylogenyDecorator { node.getNodeData().getTaxonomy().setScientificName( new_value ); break; case SEQUENCE_NAME: + if ( trim_after_tilde ) { + new_value = addTildeAnnotation( tilde_annotation, new_value ); + } if ( PhylogenyDecorator.VERBOSE ) { System.out.println( name + ": " + new_value ); } @@ -312,6 +332,9 @@ public final class PhylogenyDecorator { if ( PhylogenyDecorator.SANITIZE ) { new_value = PhylogenyDecorator.sanitize( new_value ); } + if ( trim_after_tilde ) { + new_value = addTildeAnnotation( tilde_annotation, new_value ); + } if ( PhylogenyDecorator.VERBOSE ) { System.out.println( new_value ); } @@ -329,6 +352,13 @@ public final class PhylogenyDecorator { } } + private final static String addTildeAnnotation( final String tilde_annotation, final String new_value ) { + if ( ForesterUtil.isEmpty( tilde_annotation ) ) { + return new_value; + } + return new_value + tilde_annotation; + } + public static void decorate( final Phylogeny[] phylogenies, final Map> map, final boolean picky, @@ -400,7 +430,7 @@ public final class PhylogenyDecorator { throws IOException { final Map> map = new HashMap>(); BasicTable mapping_table = null; - mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false, false ); + mapping_table = BasicTableParser.parse( mapping_table_file, '\t', false, false ); for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) { final Map row_map = new HashMap(); String name = null; @@ -437,8 +467,11 @@ public final class PhylogenyDecorator { } private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) { - final int i = new_value.lastIndexOf( "[" ); - final String tc = new_value.substring( i + 1, new_value.length() - 1 ); + final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value ); + String tc = "?"; + if ( m.find() ) { + tc = m.group( 1 ); + } ForesterUtil.ensurePresenceOfTaxonomy( node ); try { node.getNodeData().getTaxonomy().setTaxonomyCode( tc ); @@ -446,7 +479,31 @@ public final class PhylogenyDecorator { catch ( final PhyloXmlDataFormatException e ) { throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc ); } - return new_value.substring( 0, i - 1 ).trim(); + return new_value; //TODO //FIXME + } + + private static String extractBracketedTaxCodes6( final PhylogenyNode node, final String new_value ) { + final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_6.matcher( new_value ); + String tc = "?"; + if ( m.find() ) { + tc = m.group( 1 ); + } + ForesterUtil.ensurePresenceOfTaxonomy( node ); + try { + if ( tc.length() == 6 ) { + final String t = tc.substring( 0, 5 ); + System.out.println( "WARNING: taxonomy code " + tc + " -> " + t ); + tc = t; + } + else { + throw new IllegalArgumentException(); + } + node.getNodeData().getTaxonomy().setTaxonomyCode( tc ); + } + catch ( final PhyloXmlDataFormatException e ) { + throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc ); + } + return new_value; //TODO //FIXME } private static String extractIntermediate( final Map intermediate_map, final String name ) {