X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Ftools%2FPhylogenyDecorator.java;h=7e704448adc10d91d5e1c06eeaf613e37eae14e3;hb=877d1e4a2f3e2a937197dc57253ba2cead14a4d6;hp=3191201a05479e108b3f52461fb08b81cec4e898;hpb=c4cf3c01b3cbb89d98f29418444287ca8703a781;p=jalview.git diff --git a/forester/java/src/org/forester/tools/PhylogenyDecorator.java b/forester/java/src/org/forester/tools/PhylogenyDecorator.java index 3191201..7e70444 100644 --- a/forester/java/src/org/forester/tools/PhylogenyDecorator.java +++ b/forester/java/src/org/forester/tools/PhylogenyDecorator.java @@ -29,10 +29,8 @@ import java.io.File; import java.io.IOException; import java.util.HashMap; import java.util.Map; -import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.forester.archaeopteryx.AptxUtil; import org.forester.io.parsers.nhx.NHXFormatException; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; import org.forester.phylogeny.Phylogeny; @@ -93,63 +91,63 @@ public final class PhylogenyDecorator { } if ( new_values != null ) { if ( new_values.containsKey( TP_TAXONOMY_CODE ) ) { - AptxUtil.ensurePresenceOfTaxonomy( node ); + ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setTaxonomyCode( new_values.get( TP_TAXONOMY_CODE ) ); } if ( new_values.containsKey( TP_TAXONOMY_ID ) && new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) { - AptxUtil.ensurePresenceOfTaxonomy( node ); + ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData() .getTaxonomy() .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ), new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) ); } else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) { - AptxUtil.ensurePresenceOfTaxonomy( node ); + ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy() .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) ); } if ( new_values.containsKey( TP_TAXONOMY_SN ) ) { - AptxUtil.ensurePresenceOfTaxonomy( node ); + ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setScientificName( new_values.get( TP_TAXONOMY_SN ) ); } if ( new_values.containsKey( TP_TAXONOMY_CN ) ) { - AptxUtil.ensurePresenceOfTaxonomy( node ); + ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setCommonName( new_values.get( TP_TAXONOMY_CN ) ); } if ( new_values.containsKey( TP_TAXONOMY_SYN ) ) { - AptxUtil.ensurePresenceOfTaxonomy( node ); + ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().getSynonyms().add( new_values.get( TP_TAXONOMY_SYN ) ); } if ( new_values.containsKey( TP_SEQ_ACCESSION ) && new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) { - AptxUtil.ensurePresenceOfSequence( node ); + ForesterUtil.ensurePresenceOfSequence( node ); node.getNodeData() .getSequence() .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ), new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) ); } if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) { - AptxUtil.ensurePresenceOfSequence( node ); - final Annotation ann = new Annotation( "?" ); + ForesterUtil.ensurePresenceOfSequence( node ); + final Annotation ann = new Annotation(); ann.setDesc( new_values.get( TP_SEQ_ANNOTATION_DESC ) ); node.getNodeData().getSequence().addAnnotation( ann ); } if ( new_values.containsKey( TP_SEQ_ANNOTATION_REF ) ) { - AptxUtil.ensurePresenceOfSequence( node ); + ForesterUtil.ensurePresenceOfSequence( node ); final Annotation ann = new Annotation( new_values.get( TP_SEQ_ANNOTATION_REF ) ); node.getNodeData().getSequence().addAnnotation( ann ); } if ( new_values.containsKey( TP_SEQ_SYMBOL ) ) { - AptxUtil.ensurePresenceOfSequence( node ); + ForesterUtil.ensurePresenceOfSequence( node ); node.getNodeData().getSequence().setSymbol( new_values.get( TP_SEQ_SYMBOL ) ); } if ( new_values.containsKey( TP_SEQ_NAME ) ) { - AptxUtil.ensurePresenceOfSequence( node ); + ForesterUtil.ensurePresenceOfSequence( node ); node.getNodeData().getSequence().setName( new_values.get( TP_SEQ_NAME ) ); } if ( new_values.containsKey( TP_SEQ_MOL_SEQ ) ) { - AptxUtil.ensurePresenceOfSequence( node ); + ForesterUtil.ensurePresenceOfSequence( node ); node.getNodeData().getSequence().setMolecularSequence( new_values.get( TP_SEQ_MOL_SEQ ) ); } if ( new_values.containsKey( TP_NODE_NAME ) ) { @@ -183,25 +181,25 @@ public final class PhylogenyDecorator { final Map map, final FIELD field, final boolean extract_bracketed_scientific_name, + final boolean extract_bracketed_tax_code, final boolean picky, final boolean cut_name_after_space, final boolean process_name_intelligently, final boolean process_similar_to, final int numbers_of_chars_allowed_to_remove_if_not_found_in_map, - final boolean move_domain_numbers_at_end_to_middle, final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException { PhylogenyDecorator.decorate( phylogeny, map, field, extract_bracketed_scientific_name, + extract_bracketed_tax_code, picky, null, cut_name_after_space, process_name_intelligently, process_similar_to, numbers_of_chars_allowed_to_remove_if_not_found_in_map, - move_domain_numbers_at_end_to_middle, trim_after_tilde ); } @@ -224,13 +222,13 @@ public final class PhylogenyDecorator { final Map map, final FIELD field, final boolean extract_bracketed_scientific_name, + final boolean extract_bracketed_tax_code, final boolean picky, final Map intermediate_map, final boolean cut_name_after_space, final boolean process_name_intelligently, final boolean process_similar_to, final int numbers_of_chars_allowed_to_remove_if_not_found_in_map, - final boolean move_domain_numbers_at_end_to_middle, final boolean trim_after_tilde ) throws IllegalArgumentException, PhyloXmlDataFormatException { if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) { @@ -278,6 +276,9 @@ public final class PhylogenyDecorator { if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) { new_value = extractBracketedScientificNames( node, new_value ); } + else if ( extract_bracketed_tax_code && new_value.endsWith( "]" ) ) { + new_value = extractBracketedTaxCodes( node, new_value ); + } switch ( field ) { case SEQUENCE_ANNOTATION_DESC: if ( PhylogenyDecorator.VERBOSE ) { @@ -304,14 +305,14 @@ public final class PhylogenyDecorator { if ( PhylogenyDecorator.VERBOSE ) { System.out.println( name + ": " + new_value ); } - AptxUtil.ensurePresenceOfTaxonomy( node ); + ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setTaxonomyCode( new_value ); break; case TAXONOMY_SCIENTIFIC_NAME: if ( PhylogenyDecorator.VERBOSE ) { System.out.println( name + ": " + new_value ); } - AptxUtil.ensurePresenceOfTaxonomy( node ); + ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setScientificName( new_value ); break; case SEQUENCE_NAME: @@ -356,9 +357,6 @@ public final class PhylogenyDecorator { default: throw new RuntimeException( "unknown field \"" + field + "\"" ); } - if ( move_domain_numbers_at_end_to_middle && ( field != FIELD.NODE_NAME ) ) { - node.setName( moveDomainNumbersAtEnd( node.getName() ) ); - } } } else if ( picky ) { @@ -373,11 +371,9 @@ public final class PhylogenyDecorator { final boolean picky, final int numbers_of_chars_allowed_to_remove_if_not_found_in_map ) throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException { - for( int i = 0; i < phylogenies.length; ++i ) { - PhylogenyDecorator.decorate( phylogenies[ i ], - map, - picky, - numbers_of_chars_allowed_to_remove_if_not_found_in_map ); + for( final Phylogeny phylogenie : phylogenies ) { + PhylogenyDecorator + .decorate( phylogenie, map, picky, numbers_of_chars_allowed_to_remove_if_not_found_in_map ); } } @@ -385,25 +381,25 @@ public final class PhylogenyDecorator { final Map map, final FIELD field, final boolean extract_bracketed_scientific_name, + final boolean extract_bracketed_tax_code, final boolean picky, final boolean cut_name_after_space, final boolean process_name_intelligently, final boolean process_similar_to, final int numbers_of_chars_allowed_to_remove_if_not_found_in_map, - final boolean move_domain_numbers_at_end_to_middle, final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException { - for( int i = 0; i < phylogenies.length; ++i ) { - PhylogenyDecorator.decorate( phylogenies[ i ], + for( final Phylogeny phylogenie : phylogenies ) { + PhylogenyDecorator.decorate( phylogenie, map, field, extract_bracketed_scientific_name, + extract_bracketed_tax_code, picky, cut_name_after_space, process_name_intelligently, process_similar_to, numbers_of_chars_allowed_to_remove_if_not_found_in_map, - move_domain_numbers_at_end_to_middle, trim_after_tilde ); } } @@ -412,27 +408,27 @@ public final class PhylogenyDecorator { final Map map, final FIELD field, final boolean extract_bracketed_scientific_name, + final boolean extract_bracketed_tax_code, final boolean picky, final Map intermediate_map, final boolean cut_name_after_space, final boolean process_name_intelligently, final boolean process_similar_to, final int numbers_of_chars_allowed_to_remove_if_not_found_in_map, - final boolean move_domain_numbers_at_end_to_middle, final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException { - for( int i = 0; i < phylogenies.length; ++i ) { - PhylogenyDecorator.decorate( phylogenies[ i ], + for( final Phylogeny phylogenie : phylogenies ) { + PhylogenyDecorator.decorate( phylogenie, map, field, extract_bracketed_scientific_name, + extract_bracketed_tax_code, picky, intermediate_map, cut_name_after_space, process_name_intelligently, process_similar_to, numbers_of_chars_allowed_to_remove_if_not_found_in_map, - move_domain_numbers_at_end_to_middle, trim_after_tilde ); } } @@ -448,11 +444,24 @@ public final class PhylogenyDecorator { private static String extractBracketedScientificNames( final PhylogenyNode node, final String new_value ) { final int i = new_value.lastIndexOf( "[" ); final String scientific_name = new_value.substring( i + 1, new_value.length() - 1 ); - AptxUtil.ensurePresenceOfTaxonomy( node ); + ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setScientificName( scientific_name ); return new_value.substring( 0, i - 1 ).trim(); } + private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) { + final int i = new_value.lastIndexOf( "[" ); + final String tc = new_value.substring( i + 1, new_value.length() - 1 ); + ForesterUtil.ensurePresenceOfTaxonomy( node ); + try { + node.getNodeData().getTaxonomy().setTaxonomyCode( tc ); + } + catch ( final PhyloXmlDataFormatException e ) { + throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc ); + } + return new_value.substring( 0, i - 1 ).trim(); + } + private static String extractIntermediate( final Map intermediate_map, final String name ) { String new_name = null; if ( PhylogenyDecorator.VERBOSE ) { @@ -473,19 +482,6 @@ public final class PhylogenyDecorator { return new_name; } - private static String moveDomainNumbersAtEnd( final String node_name ) { - final Matcher m = NODENAME_SEQNUMBER_TAXDOMAINNUMBER.matcher( node_name ); - if ( m.matches() ) { - final String seq_number = m.group( 1 ); - final String tax = m.group( 2 ); - final String domain_number = m.group( 3 ); - return seq_number + "_[" + domain_number + "]_" + tax; - } - else { - return node_name; - } - } - public static Map> parseMappingTable( final File mapping_table_file ) throws IOException { final Map> map = new HashMap>();