From: cmzmasek@gmail.com Date: Wed, 13 Nov 2013 21:23:54 +0000 (+0000) Subject: inprogress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=c11943c6b43fd769d37936743f181bf28f34912d;p=jalview.git inprogress --- diff --git a/forester/java/src/org/forester/application/decorator.java b/forester/java/src/org/forester/application/decorator.java index c318c76..925ba9e 100644 --- a/forester/java/src/org/forester/application/decorator.java +++ b/forester/java/src/org/forester/application/decorator.java @@ -26,13 +26,16 @@ package org.forester.application; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import org.forester.io.parsers.FastaParser; import org.forester.io.parsers.PhylogenyParser; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; @@ -42,6 +45,7 @@ import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY; import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.sequence.Sequence; import org.forester.tools.PhylogenyDecorator; import org.forester.tools.PhylogenyDecorator.FIELD; import org.forester.util.BasicTable; @@ -52,6 +56,7 @@ import org.forester.util.ForesterUtil; public final class decorator { private static final String SEQUENCE_NAME_FIELD = "s"; + private static final String MOL_SEQ = "m"; private static final String TAXONOMY_CODE_FIELD = "c"; private static final String TAXONOMY_SCIENTIFIC_NAME_FIELD = "sn"; private static final String DS_FILED = "d"; @@ -60,6 +65,7 @@ public final class decorator { final static private String PICKY_OPTION = "p"; final static private String FIELD_OPTION = "f"; final static private String TRIM_AFTER_TILDE_OPTION = "t"; + final static private String VERBOSE_OPTION = "ve"; final static private String TREE_NAME_OPTION = "pn"; final static private String TREE_ID_OPTION = "pi"; final static private String TREE_DESC_OPTION = "pd"; @@ -77,8 +83,8 @@ public final class decorator { final static private String MAPPING_FILE_SEPARATOR_OPTION = "s"; final static private char MAPPING_FILE_SEPARATOR_DEFAULT = '\t'; final static private String PRG_NAME = "decorator"; - final static private String PRG_VERSION = "1.14"; - final static private String PRG_DATE = "130426"; + final static private String PRG_VERSION = "1.16"; + final static private String PRG_DATE = "131113"; public static void main( final String args[] ) { ForesterUtil.printProgramInformation( decorator.PRG_NAME, decorator.PRG_VERSION, decorator.PRG_DATE ); @@ -120,6 +126,7 @@ public final class decorator { allowed_options.add( decorator.TRIM_AFTER_TILDE_OPTION ); allowed_options.add( decorator.ORDER_TREE_OPTION ); allowed_options.add( decorator.MIDPOINT_ROOT_OPTION ); + allowed_options.add( decorator.VERBOSE_OPTION ); final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options ); if ( dissallowed_options.length() > 0 ) { ForesterUtil.fatalError( decorator.PRG_NAME, "unknown option(s): " + dissallowed_options ); @@ -154,6 +161,7 @@ public final class decorator { boolean trim_after_tilde = false; boolean order_tree = false; boolean midpoint_root = false; + boolean verbose = false; String tree_name = ""; String tree_id = ""; String tree_desc = ""; @@ -225,6 +233,9 @@ public final class decorator { if ( cla.isOptionSet( decorator.ORDER_TREE_OPTION ) ) { order_tree = true; } + if ( cla.isOptionSet( decorator.VERBOSE_OPTION ) ) { + verbose = true; + } if ( cla.isOptionSet( decorator.FIELD_OPTION ) ) { field_str = cla.getOptionValue( decorator.FIELD_OPTION ); if ( field_str.equals( NODE_NAME_FIELD ) ) { @@ -244,6 +255,9 @@ public final class decorator { else if ( field_str.equals( SEQUENCE_NAME_FIELD ) ) { field = FIELD.SEQUENCE_NAME; } + else if ( field_str.equals( MOL_SEQ ) ) { + field = FIELD.MOL_SEQ; + } else if ( field_str.equals( TAXONOMY_SCIENTIFIC_NAME_FIELD ) ) { field = FIELD.TAXONOMY_SCIENTIFIC_NAME; extract_bracketed_scientific_name = false; @@ -291,34 +305,41 @@ public final class decorator { } Map map = null; if ( !advanced_table ) { - BasicTable mapping_table = null; - try { - mapping_table = BasicTableParser.parse( mapping_infile, separator, true, false ); - } - catch ( final Exception e ) { - ForesterUtil.fatalError( decorator.PRG_NAME, - "failed to read [" + mapping_infile + "] [" + e.getMessage() + "]" ); - } - if ( ( key_column < 0 ) || ( key_column >= mapping_table.getNumberOfColumns() ) ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for key column" ); - } - if ( ( value_column < 0 ) || ( value_column >= mapping_table.getNumberOfColumns() ) ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for value column" ); - } - if ( mapping_table.isEmpty() || ( mapping_table.getNumberOfColumns() < 1 ) ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table is empty" ); - } - if ( mapping_table.getNumberOfColumns() == 1 ) { - ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table has only one column" ); + if ( field != FIELD.MOL_SEQ ) { + BasicTable mapping_table = null; + try { + mapping_table = BasicTableParser.parse( mapping_infile, separator, true, false ); + } + catch ( final Exception e ) { + ForesterUtil.fatalError( decorator.PRG_NAME, + "failed to read [" + mapping_infile + "] [" + e.getMessage() + "]" ); + } + if ( ( key_column < 0 ) || ( key_column >= mapping_table.getNumberOfColumns() ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for key column" ); + } + if ( ( value_column < 0 ) || ( value_column >= mapping_table.getNumberOfColumns() ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "illegal value for value column" ); + } + if ( mapping_table.isEmpty() || ( mapping_table.getNumberOfColumns() < 1 ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table is empty" ); + } + if ( mapping_table.getNumberOfColumns() == 1 ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "mapping table has only one column" ); + } + map = mapping_table.getColumnsAsMap( key_column, value_column ); + if ( verbose ) { + final Iterator> iter = map.entrySet().iterator(); + System.out.println(); + while ( iter.hasNext() ) { + final Entry e = iter.next(); + System.out.println( e.getKey() + " => " + e.getValue() ); + } + System.out.println(); + } } - map = mapping_table.getColumnsAsMap( key_column, value_column ); - final Iterator> iter = map.entrySet().iterator(); - System.out.println(); - while ( iter.hasNext() ) { - final Entry e = iter.next(); - System.out.println( e.getKey() + " => " + e.getValue() ); + else { + map = readFastaFileIntoMap( mapping_infile, verbose ); } - System.out.println(); } if ( !ForesterUtil.isEmpty( tree_name ) || !ForesterUtil.isEmpty( tree_id ) || !ForesterUtil.isEmpty( tree_desc ) ) { @@ -366,7 +387,8 @@ public final class decorator { process_name_intelligently, process_similar_to, numbers_of_chars_allowed_to_remove_if_not_found_in_map, - trim_after_tilde ); + trim_after_tilde, + verbose ); } } catch ( final NullPointerException e ) { @@ -397,10 +419,44 @@ public final class decorator { ForesterUtil.programMessage( PRG_NAME, "OK." ); } + private static Map readFastaFileIntoMap( final File mapping_infile, final boolean verbose ) { + List seqs = null; + try { + seqs = FastaParser.parse( new FileInputStream( mapping_infile ) ); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "failed to read fasta-file from [" + mapping_infile + "] [" + + e.getMessage() + "]" ); + } + if ( ForesterUtil.isEmpty( seqs ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "fasta-file [" + mapping_infile + + "] is devoid of fasta-formatted sequences" ); + } + final Map map = new HashMap(); + for( final Sequence seq : seqs ) { + if ( ForesterUtil.isEmpty( seq.getIdentifier() ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "fasta-file [" + mapping_infile + + "] contains sequence with empty identifier" ); + } + if ( map.containsKey( seq.getIdentifier() ) ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "sequence identifier [" + seq.getIdentifier() + + "] is not unique" ); + } + if ( seq.getLength() < 1 ) { + ForesterUtil.fatalError( decorator.PRG_NAME, "sequence [" + seq.getIdentifier() + "] is empty" ); + } + map.put( seq.getIdentifier(), seq.getMolecularSequenceAsString() ); + if ( verbose ) { + System.out.println( seq.getIdentifier() + " => " + seq.getMolecularSequenceAsString() ); + } + } + return map; + } + private static void argumentsError() { System.out.println(); System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f= " - + "[mapping table file] " ); + + "[mapping table file|fasta-file] " ); System.out.println(); System.out.println( "options:" ); System.out.println(); @@ -423,6 +479,7 @@ public final class decorator { System.out.println( " " + TAXONOMY_SCIENTIFIC_NAME_FIELD + ": taxonomy scientific name" ); System.out.println( " " + SEQUENCE_NAME_FIELD + " : sequence name" ); + System.out.println( " " + MOL_SEQ + " : molecular sequence" ); System.out.println( " -k= : key column in mapping table (0 based)," ); System.out.println( " names of the node to be decorated - default is 0" ); System.out.println( " -v= : value column in mapping table (0 based)," ); @@ -439,8 +496,9 @@ public final class decorator { System.out.println( " -c : cut name after first space (only for -f=n)" ); System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION + " : trim node name to be replaced after tilde" ); - System.out.println( " -" + decorator.MIDPOINT_ROOT_OPTION + " : to midpoint-root the tree" ); - System.out.println( " -" + decorator.ORDER_TREE_OPTION + " : to order tree branches" ); + System.out.println( " -" + decorator.MIDPOINT_ROOT_OPTION + " : to midpoint-root the tree" ); + System.out.println( " -" + decorator.ORDER_TREE_OPTION + " : to order tree branches" ); + System.out.println( " -" + decorator.VERBOSE_OPTION + " : verbose" ); System.out.println(); System.exit( -1 ); } diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index faa0918..7931248 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -63,9 +63,7 @@ public final class ParserUtils { final public static Pattern TAXOMONY_CODE_PATTERN_PFR = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_(" + TAX_CODE + ")\\b" ); final public static Pattern TAXOMONY_CODE_PATTERN_A = Pattern.compile( "(?:\\b|_)(" + TAX_CODE + ")\\b" ); - final public static Pattern TAXOMONY_CODE_PATTERN_4 = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" ); - final public static Pattern TAXOMONY_CODE_PATTERN_6 = Pattern - .compile( "\\[([A-Z9][A-Z]{2}[A-Z0-9]{3})\\]" ); + final public static Pattern TAXOMONY_CODE_PATTERN_BRACKETED = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_A = Pattern.compile( "(?:\\b|_)(\\d{1,7})\\b" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" ); diff --git a/forester/java/src/org/forester/tools/PhylogenyDecorator.java b/forester/java/src/org/forester/tools/PhylogenyDecorator.java index 57ef9f0..27a5b03 100644 --- a/forester/java/src/org/forester/tools/PhylogenyDecorator.java +++ b/forester/java/src/org/forester/tools/PhylogenyDecorator.java @@ -48,23 +48,22 @@ import org.forester.util.ForesterUtil; public final class PhylogenyDecorator { - // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb: - final private static String TP_TAXONOMY_CODE = "TAXONOMY_CODE"; - final private static String TP_TAXONOMY_ID = "TAXONOMY_ID"; - final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER"; - final private static String TP_TAXONOMY_SN = "TAXONOMY_SN"; - final private static String TP_TAXONOMY_CN = "TAXONOMY_CN"; - final private static String TP_TAXONOMY_SYN = "TAXONOMY_SYN"; - final private static String TP_SEQ_SYMBOL = "SEQ_SYMBOL"; + public final static boolean SANITIZE = false; + final private static String TP_NODE_NAME = "NODE_NAME"; final private static String TP_SEQ_ACCESSION = "SEQ_ACCESSION"; final private static String TP_SEQ_ACCESSION_SOURCE = "SEQ_ACCESSION_SOURCE"; final private static String TP_SEQ_ANNOTATION_DESC = "SEQ_ANNOTATION_DESC"; final private static String TP_SEQ_ANNOTATION_REF = "SEQ_ANNOTATION_REF"; final private static String TP_SEQ_MOL_SEQ = "SEQ_MOL_SEQ"; final private static String TP_SEQ_NAME = "SEQ_NAME"; - final private static String TP_NODE_NAME = "NODE_NAME"; - public final static boolean SANITIZE = false; - public final static boolean VERBOSE = true; + final private static String TP_SEQ_SYMBOL = "SEQ_SYMBOL"; + final private static String TP_TAXONOMY_CN = "TAXONOMY_CN"; + // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb: + final private static String TP_TAXONOMY_CODE = "TAXONOMY_CODE"; + final private static String TP_TAXONOMY_ID = "TAXONOMY_ID"; + final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER"; + final private static String TP_TAXONOMY_SN = "TAXONOMY_SN"; + final private static String TP_TAXONOMY_SYN = "TAXONOMY_SYN"; private PhylogenyDecorator() { // Not needed. @@ -170,7 +169,8 @@ public final class PhylogenyDecorator { final boolean process_name_intelligently, final boolean process_similar_to, final int numbers_of_chars_allowed_to_remove_if_not_found_in_map, - final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException, + final boolean trim_after_tilde, + final boolean verbose ) throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException { PhylogenyDecorator.decorate( phylogeny, map, @@ -183,7 +183,8 @@ public final class PhylogenyDecorator { process_name_intelligently, process_similar_to, numbers_of_chars_allowed_to_remove_if_not_found_in_map, - trim_after_tilde ); + trim_after_tilde, + verbose ); } /** @@ -212,8 +213,8 @@ public final class PhylogenyDecorator { final boolean process_name_intelligently, final boolean process_similar_to, final int numbers_of_chars_allowed_to_remove_if_not_found_in_map, - final boolean trim_after_tilde ) throws IllegalArgumentException, - PhyloXmlDataFormatException { + final boolean trim_after_tilde, + final boolean verbose ) throws IllegalArgumentException, PhyloXmlDataFormatException { if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) { throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" ); } @@ -231,7 +232,7 @@ public final class PhylogenyDecorator { } if ( !ForesterUtil.isEmpty( name ) ) { if ( intermediate_map != null ) { - name = PhylogenyDecorator.extractIntermediate( intermediate_map, name ); + name = PhylogenyDecorator.extractIntermediate( intermediate_map, name, verbose ); } if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) { String new_value = map.get( name ); @@ -248,20 +249,26 @@ public final class PhylogenyDecorator { new_value = extractBracketedScientificNames( node, new_value ); } else if ( extract_bracketed_tax_code ) { - if ( ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value ).find() ) { + if ( ParserUtils.TAXOMONY_CODE_PATTERN_BRACKETED.matcher( new_value ).find() ) { new_value = extractBracketedTaxCodes( node, new_value ); } - else if ( ParserUtils.TAXOMONY_CODE_PATTERN_6.matcher( new_value ).find() ) { - new_value = extractBracketedTaxCodes6( node, new_value ); - } else if ( picky ) { throw new IllegalArgumentException( " could not get taxonomy from \"" + new_value + "\"" ); } } switch ( field ) { + case MOL_SEQ: + if ( verbose ) { + System.out.println( name + ": " + new_value ); + } + if ( !node.getNodeData().isHasSequence() ) { + node.getNodeData().setSequence( new Sequence() ); + } + node.getNodeData().getSequence().setMolecularSequence( new_value ); + break; case SEQUENCE_ANNOTATION_DESC: - if ( PhylogenyDecorator.VERBOSE ) { + if ( verbose ) { System.out.println( name + ": " + new_value ); } if ( !node.getNodeData().isHasSequence() ) { @@ -272,7 +279,7 @@ public final class PhylogenyDecorator { node.getNodeData().getSequence().addAnnotation( annotation ); break; case DOMAIN_STRUCTURE: - if ( PhylogenyDecorator.VERBOSE ) { + if ( verbose ) { System.out.println( name + ": " + new_value ); } if ( !node.getNodeData().isHasSequence() ) { @@ -282,14 +289,14 @@ public final class PhylogenyDecorator { .setDomainArchitecture( new DomainArchitecture( new_value ) ); break; case TAXONOMY_CODE: - if ( PhylogenyDecorator.VERBOSE ) { + if ( verbose ) { System.out.println( name + ": " + new_value ); } ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy().setTaxonomyCode( new_value ); break; case TAXONOMY_SCIENTIFIC_NAME: - if ( PhylogenyDecorator.VERBOSE ) { + if ( verbose ) { System.out.println( name + ": " + new_value ); } ForesterUtil.ensurePresenceOfTaxonomy( node ); @@ -299,7 +306,7 @@ public final class PhylogenyDecorator { if ( trim_after_tilde ) { new_value = addTildeAnnotation( tilde_annotation, new_value ); } - if ( PhylogenyDecorator.VERBOSE ) { + if ( verbose ) { System.out.println( name + ": " + new_value ); } if ( !node.getNodeData().isHasSequence() ) { @@ -308,23 +315,23 @@ public final class PhylogenyDecorator { node.getNodeData().getSequence().setName( new_value ); break; case NODE_NAME: - if ( PhylogenyDecorator.VERBOSE ) { + if ( verbose ) { System.out.print( name + " -> " ); } if ( cut_name_after_space ) { - if ( PhylogenyDecorator.VERBOSE ) { + if ( verbose ) { System.out.print( new_value + " -> " ); } new_value = PhylogenyDecorator.deleteAtFirstSpace( new_value ); } else if ( process_name_intelligently ) { - if ( PhylogenyDecorator.VERBOSE ) { + if ( verbose ) { System.out.print( new_value + " -> " ); } new_value = PhylogenyDecorator.processNameIntelligently( new_value ); } else if ( process_similar_to ) { - if ( PhylogenyDecorator.VERBOSE ) { + if ( verbose ) { System.out.print( new_value + " -> " ); } new_value = PhylogenyDecorator.processSimilarTo( new_value ); @@ -335,7 +342,7 @@ public final class PhylogenyDecorator { if ( trim_after_tilde ) { new_value = addTildeAnnotation( tilde_annotation, new_value ); } - if ( PhylogenyDecorator.VERBOSE ) { + if ( verbose ) { System.out.println( new_value ); } node.setName( new_value ); @@ -352,13 +359,6 @@ public final class PhylogenyDecorator { } } - private final static String addTildeAnnotation( final String tilde_annotation, final String new_value ) { - if ( ForesterUtil.isEmpty( tilde_annotation ) ) { - return new_value; - } - return new_value + tilde_annotation; - } - public static void decorate( final Phylogeny[] phylogenies, final Map> map, final boolean picky, @@ -380,7 +380,8 @@ public final class PhylogenyDecorator { final boolean process_name_intelligently, final boolean process_similar_to, final int numbers_of_chars_allowed_to_remove_if_not_found_in_map, - final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException, + final boolean trim_after_tilde, + final boolean verbose ) throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException { for( final Phylogeny phylogenie : phylogenies ) { PhylogenyDecorator.decorate( phylogenie, @@ -393,7 +394,8 @@ public final class PhylogenyDecorator { process_name_intelligently, process_similar_to, numbers_of_chars_allowed_to_remove_if_not_found_in_map, - trim_after_tilde ); + trim_after_tilde, + verbose ); } } @@ -408,7 +410,8 @@ public final class PhylogenyDecorator { final boolean process_name_intelligently, final boolean process_similar_to, final int numbers_of_chars_allowed_to_remove_if_not_found_in_map, - final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException, + final boolean trim_after_tilde, + final boolean verbose ) throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException { for( final Phylogeny phylogenie : phylogenies ) { PhylogenyDecorator.decorate( phylogenie, @@ -422,7 +425,8 @@ public final class PhylogenyDecorator { process_name_intelligently, process_similar_to, numbers_of_chars_allowed_to_remove_if_not_found_in_map, - trim_after_tilde ); + trim_after_tilde, + verbose ); } } @@ -450,6 +454,13 @@ public final class PhylogenyDecorator { return map; } + private final static String addTildeAnnotation( final String tilde_annotation, final String new_value ) { + if ( ForesterUtil.isEmpty( tilde_annotation ) ) { + return new_value; + } + return new_value + tilde_annotation; + } + private static String deleteAtFirstSpace( final String name ) { final int first_space = name.indexOf( " " ); if ( first_space > 1 ) { @@ -467,48 +478,37 @@ public final class PhylogenyDecorator { } private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) { - final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value ); - String tc = "?"; - if ( m.find() ) { - tc = m.group( 1 ); - } - ForesterUtil.ensurePresenceOfTaxonomy( node ); - try { - node.getNodeData().getTaxonomy().setTaxonomyCode( tc ); - } - catch ( final PhyloXmlDataFormatException e ) { - throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc ); + final StringBuilder sb = new StringBuilder(); + sb.append( new_value ); + final String tc = extractBracketedTaxCodes( sb ); + if ( !ForesterUtil.isEmpty( tc ) ) { + ForesterUtil.ensurePresenceOfTaxonomy( node ); + try { + node.getNodeData().getTaxonomy().setTaxonomyCode( tc ); + } + catch ( final PhyloXmlDataFormatException e ) { + throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc ); + } + return sb.toString().trim(); } - return new_value; //TODO //FIXME + return new_value; } - private static String extractBracketedTaxCodes6( final PhylogenyNode node, final String new_value ) { - final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_6.matcher( new_value ); - String tc = "?"; + private static String extractBracketedTaxCodes( final StringBuilder sb ) { + final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_BRACKETED.matcher( sb ); if ( m.find() ) { - tc = m.group( 1 ); - } - ForesterUtil.ensurePresenceOfTaxonomy( node ); - try { - if ( tc.length() == 6 ) { - final String t = tc.substring( 0, 5 ); - System.out.println( "WARNING: taxonomy code " + tc + " -> " + t ); - tc = t; - } - else { - throw new IllegalArgumentException(); - } - node.getNodeData().getTaxonomy().setTaxonomyCode( tc ); - } - catch ( final PhyloXmlDataFormatException e ) { - throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc ); + final String tc = m.group( 1 ); + sb.delete( m.start( 1 ) - 1, m.end( 1 ) + 1 ); + return tc; } - return new_value; //TODO //FIXME + return null; } - private static String extractIntermediate( final Map intermediate_map, final String name ) { + private static String extractIntermediate( final Map intermediate_map, + final String name, + final boolean verbose ) { String new_name = null; - if ( PhylogenyDecorator.VERBOSE ) { + if ( verbose ) { System.out.print( name + " => " ); } if ( intermediate_map.containsKey( name ) ) { @@ -520,7 +520,7 @@ public final class PhylogenyDecorator { else { throw new IllegalArgumentException( "\"" + name + "\" not found in name secondary map" ); } - if ( PhylogenyDecorator.VERBOSE ) { + if ( verbose ) { System.out.println( new_name + " " ); } return new_name; @@ -575,6 +575,12 @@ public final class PhylogenyDecorator { } public static enum FIELD { - NODE_NAME, SEQUENCE_ANNOTATION_DESC, DOMAIN_STRUCTURE, TAXONOMY_CODE, TAXONOMY_SCIENTIFIC_NAME, SEQUENCE_NAME; + DOMAIN_STRUCTURE, + MOL_SEQ, + NODE_NAME, + SEQUENCE_ANNOTATION_DESC, + SEQUENCE_NAME, + TAXONOMY_CODE, + TAXONOMY_SCIENTIFIC_NAME; } }