import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
-import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.forester.archaeopteryx.AptxUtil;
import org.forester.io.parsers.nhx.NHXFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.phylogeny.Phylogeny;
}
if ( new_values != null ) {
if ( new_values.containsKey( TP_TAXONOMY_CODE ) ) {
- AptxUtil.ensurePresenceOfTaxonomy( node );
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
node.getNodeData().getTaxonomy().setTaxonomyCode( new_values.get( TP_TAXONOMY_CODE ) );
}
if ( new_values.containsKey( TP_TAXONOMY_ID )
&& new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) {
- AptxUtil.ensurePresenceOfTaxonomy( node );
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
node.getNodeData()
.getTaxonomy()
.setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ),
new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) );
}
else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) {
- AptxUtil.ensurePresenceOfTaxonomy( node );
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
node.getNodeData().getTaxonomy()
.setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) );
}
if ( new_values.containsKey( TP_TAXONOMY_SN ) ) {
- AptxUtil.ensurePresenceOfTaxonomy( node );
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
node.getNodeData().getTaxonomy().setScientificName( new_values.get( TP_TAXONOMY_SN ) );
}
if ( new_values.containsKey( TP_TAXONOMY_CN ) ) {
- AptxUtil.ensurePresenceOfTaxonomy( node );
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
node.getNodeData().getTaxonomy().setCommonName( new_values.get( TP_TAXONOMY_CN ) );
}
if ( new_values.containsKey( TP_TAXONOMY_SYN ) ) {
- AptxUtil.ensurePresenceOfTaxonomy( node );
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
node.getNodeData().getTaxonomy().getSynonyms().add( new_values.get( TP_TAXONOMY_SYN ) );
}
if ( new_values.containsKey( TP_SEQ_ACCESSION )
&& new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) {
- AptxUtil.ensurePresenceOfSequence( node );
+ ForesterUtil.ensurePresenceOfSequence( node );
node.getNodeData()
.getSequence()
.setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ),
new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) );
}
if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) {
- AptxUtil.ensurePresenceOfSequence( node );
- final Annotation ann = new Annotation( "?" );
+ ForesterUtil.ensurePresenceOfSequence( node );
+ final Annotation ann = new Annotation();
ann.setDesc( new_values.get( TP_SEQ_ANNOTATION_DESC ) );
node.getNodeData().getSequence().addAnnotation( ann );
}
if ( new_values.containsKey( TP_SEQ_ANNOTATION_REF ) ) {
- AptxUtil.ensurePresenceOfSequence( node );
+ ForesterUtil.ensurePresenceOfSequence( node );
final Annotation ann = new Annotation( new_values.get( TP_SEQ_ANNOTATION_REF ) );
node.getNodeData().getSequence().addAnnotation( ann );
}
if ( new_values.containsKey( TP_SEQ_SYMBOL ) ) {
- AptxUtil.ensurePresenceOfSequence( node );
+ ForesterUtil.ensurePresenceOfSequence( node );
node.getNodeData().getSequence().setSymbol( new_values.get( TP_SEQ_SYMBOL ) );
}
if ( new_values.containsKey( TP_SEQ_NAME ) ) {
- AptxUtil.ensurePresenceOfSequence( node );
+ ForesterUtil.ensurePresenceOfSequence( node );
node.getNodeData().getSequence().setName( new_values.get( TP_SEQ_NAME ) );
}
if ( new_values.containsKey( TP_SEQ_MOL_SEQ ) ) {
- AptxUtil.ensurePresenceOfSequence( node );
+ ForesterUtil.ensurePresenceOfSequence( node );
node.getNodeData().getSequence().setMolecularSequence( new_values.get( TP_SEQ_MOL_SEQ ) );
}
if ( new_values.containsKey( TP_NODE_NAME ) ) {
final Map<String, String> map,
final FIELD field,
final boolean extract_bracketed_scientific_name,
+ final boolean extract_bracketed_tax_code,
final boolean picky,
final boolean cut_name_after_space,
final boolean process_name_intelligently,
final boolean process_similar_to,
final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- final boolean move_domain_numbers_at_end_to_middle,
final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
PhyloXmlDataFormatException {
PhylogenyDecorator.decorate( phylogeny,
map,
field,
extract_bracketed_scientific_name,
+ extract_bracketed_tax_code,
picky,
null,
cut_name_after_space,
process_name_intelligently,
process_similar_to,
numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- move_domain_numbers_at_end_to_middle,
trim_after_tilde );
}
final Map<String, String> map,
final FIELD field,
final boolean extract_bracketed_scientific_name,
+ final boolean extract_bracketed_tax_code,
final boolean picky,
final Map<String, String> intermediate_map,
final boolean cut_name_after_space,
final boolean process_name_intelligently,
final boolean process_similar_to,
final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- final boolean move_domain_numbers_at_end_to_middle,
final boolean trim_after_tilde ) throws IllegalArgumentException,
PhyloXmlDataFormatException {
if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) {
new_value = extractBracketedScientificNames( node, new_value );
}
+ else if ( extract_bracketed_tax_code && new_value.endsWith( "]" ) ) {
+ new_value = extractBracketedTaxCodes( node, new_value );
+ }
switch ( field ) {
case SEQUENCE_ANNOTATION_DESC:
if ( PhylogenyDecorator.VERBOSE ) {
if ( PhylogenyDecorator.VERBOSE ) {
System.out.println( name + ": " + new_value );
}
- AptxUtil.ensurePresenceOfTaxonomy( node );
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
node.getNodeData().getTaxonomy().setTaxonomyCode( new_value );
break;
case TAXONOMY_SCIENTIFIC_NAME:
if ( PhylogenyDecorator.VERBOSE ) {
System.out.println( name + ": " + new_value );
}
- AptxUtil.ensurePresenceOfTaxonomy( node );
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
node.getNodeData().getTaxonomy().setScientificName( new_value );
break;
case SEQUENCE_NAME:
default:
throw new RuntimeException( "unknown field \"" + field + "\"" );
}
- if ( move_domain_numbers_at_end_to_middle && ( field != FIELD.NODE_NAME ) ) {
- node.setName( moveDomainNumbersAtEnd( node.getName() ) );
- }
}
}
else if ( picky ) {
final boolean picky,
final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException {
- for( int i = 0; i < phylogenies.length; ++i ) {
- PhylogenyDecorator.decorate( phylogenies[ i ],
- map,
- picky,
- numbers_of_chars_allowed_to_remove_if_not_found_in_map );
+ for( final Phylogeny phylogenie : phylogenies ) {
+ PhylogenyDecorator
+ .decorate( phylogenie, map, picky, numbers_of_chars_allowed_to_remove_if_not_found_in_map );
}
}
final Map<String, String> map,
final FIELD field,
final boolean extract_bracketed_scientific_name,
+ final boolean extract_bracketed_tax_code,
final boolean picky,
final boolean cut_name_after_space,
final boolean process_name_intelligently,
final boolean process_similar_to,
final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- final boolean move_domain_numbers_at_end_to_middle,
final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
PhyloXmlDataFormatException {
- for( int i = 0; i < phylogenies.length; ++i ) {
- PhylogenyDecorator.decorate( phylogenies[ i ],
+ for( final Phylogeny phylogenie : phylogenies ) {
+ PhylogenyDecorator.decorate( phylogenie,
map,
field,
extract_bracketed_scientific_name,
+ extract_bracketed_tax_code,
picky,
cut_name_after_space,
process_name_intelligently,
process_similar_to,
numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- move_domain_numbers_at_end_to_middle,
trim_after_tilde );
}
}
final Map<String, String> map,
final FIELD field,
final boolean extract_bracketed_scientific_name,
+ final boolean extract_bracketed_tax_code,
final boolean picky,
final Map<String, String> intermediate_map,
final boolean cut_name_after_space,
final boolean process_name_intelligently,
final boolean process_similar_to,
final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- final boolean move_domain_numbers_at_end_to_middle,
final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
PhyloXmlDataFormatException {
- for( int i = 0; i < phylogenies.length; ++i ) {
- PhylogenyDecorator.decorate( phylogenies[ i ],
+ for( final Phylogeny phylogenie : phylogenies ) {
+ PhylogenyDecorator.decorate( phylogenie,
map,
field,
extract_bracketed_scientific_name,
+ extract_bracketed_tax_code,
picky,
intermediate_map,
cut_name_after_space,
process_name_intelligently,
process_similar_to,
numbers_of_chars_allowed_to_remove_if_not_found_in_map,
- move_domain_numbers_at_end_to_middle,
trim_after_tilde );
}
}
private static String extractBracketedScientificNames( final PhylogenyNode node, final String new_value ) {
final int i = new_value.lastIndexOf( "[" );
final String scientific_name = new_value.substring( i + 1, new_value.length() - 1 );
- AptxUtil.ensurePresenceOfTaxonomy( node );
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
node.getNodeData().getTaxonomy().setScientificName( scientific_name );
return new_value.substring( 0, i - 1 ).trim();
}
+ private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) {
+ final int i = new_value.lastIndexOf( "[" );
+ final String tc = new_value.substring( i + 1, new_value.length() - 1 );
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
+ try {
+ node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
+ }
+ catch ( final PhyloXmlDataFormatException e ) {
+ throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
+ }
+ return new_value.substring( 0, i - 1 ).trim();
+ }
+
private static String extractIntermediate( final Map<String, String> intermediate_map, final String name ) {
String new_name = null;
if ( PhylogenyDecorator.VERBOSE ) {
return new_name;
}
- private static String moveDomainNumbersAtEnd( final String node_name ) {
- final Matcher m = NODENAME_SEQNUMBER_TAXDOMAINNUMBER.matcher( node_name );
- if ( m.matches() ) {
- final String seq_number = m.group( 1 );
- final String tax = m.group( 2 );
- final String domain_number = m.group( 3 );
- return seq_number + "_[" + domain_number + "]_" + tax;
- }
- else {
- return node_name;
- }
- }
-
public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
throws IOException {
final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();