// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.tools;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
+import java.util.regex.Matcher;
import org.forester.io.parsers.nhx.NHXFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
+import org.forester.io.parsers.util.ParserUtils;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.Accession;
if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" );
}
+ if ( map.isEmpty() ) {
+ throw new IllegalArgumentException( "map is empty" );
+ }
for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
String name = node.getName();
+ String tilde_annotation = null;
if ( trim_after_tilde && ( name.indexOf( '~' ) > 0 ) ) {
- name = name.substring( 0, name.indexOf( '~' ) );
+ final int ti = name.indexOf( '~' );
+ tilde_annotation = name.substring( ti );
+ name = name.substring( 0, ti );
}
if ( !ForesterUtil.isEmpty( name ) ) {
if ( intermediate_map != null ) {
if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) {
new_value = extractBracketedScientificNames( node, new_value );
}
- else if ( extract_bracketed_tax_code && new_value.endsWith( "]" ) ) {
- new_value = extractBracketedTaxCodes( node, new_value );
+ else if ( extract_bracketed_tax_code ) {
+ if ( ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value ).find() ) {
+ new_value = extractBracketedTaxCodes( node, new_value );
+ }
+ else if ( picky ) {
+ throw new IllegalArgumentException( " could not get taxonomy from \"" + new_value
+ + "\"" );
+ }
}
switch ( field ) {
case SEQUENCE_ANNOTATION_DESC:
node.getNodeData().getTaxonomy().setScientificName( new_value );
break;
case SEQUENCE_NAME:
+ if ( trim_after_tilde ) {
+ new_value = addTildeAnnotation( tilde_annotation, new_value );
+ }
if ( PhylogenyDecorator.VERBOSE ) {
System.out.println( name + ": " + new_value );
}
if ( PhylogenyDecorator.SANITIZE ) {
new_value = PhylogenyDecorator.sanitize( new_value );
}
+ if ( trim_after_tilde ) {
+ new_value = addTildeAnnotation( tilde_annotation, new_value );
+ }
if ( PhylogenyDecorator.VERBOSE ) {
System.out.println( new_value );
}
}
}
+ private final static String addTildeAnnotation( final String tilde_annotation, final String new_value ) {
+ if ( ForesterUtil.isEmpty( tilde_annotation ) ) {
+ return new_value;
+ }
+ return new_value + tilde_annotation;
+ }
+
public static void decorate( final Phylogeny[] phylogenies,
final Map<String, Map<String, String>> map,
final boolean picky,
throws IOException {
final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
BasicTable<String> mapping_table = null;
- mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false, false );
+ mapping_table = BasicTableParser.parse( mapping_table_file, '\t', false, false );
for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
final Map<String, String> row_map = new HashMap<String, String>();
String name = null;
}
private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) {
- final int i = new_value.lastIndexOf( "[" );
- final String tc = new_value.substring( i + 1, new_value.length() - 1 );
+ final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value );
+ String tc = "?";
+ if ( m.find() ) {
+ tc = m.group( 1 );
+ }
ForesterUtil.ensurePresenceOfTaxonomy( node );
try {
node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
catch ( final PhyloXmlDataFormatException e ) {
throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
}
- return new_value.substring( 0, i - 1 ).trim();
+ return new_value; //TODO //FIXME
}
private static String extractIntermediate( final Map<String, String> intermediate_map, final String name ) {