final static private String MAPPING_FILE_SEPARATOR_OPTION = "s";
final static private String MAPPING_FILE_SEPARATOR_DEFAULT = ": ";
final static private String PRG_NAME = "decorator";
- final static private String PRG_VERSION = "1.11";
- final static private String PRG_DATE = "2012.09.15";
-
- private static void argumentsError() {
- System.out.println();
- System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f=<c> <phylogenies infile> "
- + "[mapping table file] <phylogenies outfile>" );
- System.out.println();
- System.out.println( "options:" );
- System.out.println();
- System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=<c>)" );
- System.out.println( " -r=<n> : allow to remove up to n characters from the end of the names" );
- System.out.println( " in phylogenies infile if not found (in map) otherwise" );
- System.out.println( " -p : picky, fails if node name not found in mapping table" );
- System.out.println( " -" + TREE_NAME_OPTION + "=<s>: name for the phylogeny" );
- System.out.println( " -" + TREE_ID_OPTION + "=<s>: identifier for the phylogeny (in the form provider:value)" );
- System.out.println( " -" + TREE_DESC_OPTION + "=<s>: description for phylogenies" );
- System.out.println();
- System.out.println();
- System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" );
- System.out.println();
- System.out.println( " -f=<c> : field to be replaced: " + NODE_NAME_FIELD + " : node name" );
- System.out.println( " " + SEQUENCE_ANNOTATION_DESC
- + " : sequence annotation description" );
- System.out.println( " " + DS_FILED + " : domain structure" );
- System.out.println( " " + TAXONOMY_CODE_FIELD + " : taxonomy code" );
- System.out.println( " " + TAXONOMY_SCIENTIFIC_NAME_FIELD
- + ": taxonomy scientific name" );
- System.out.println( " " + SEQUENCE_NAME_FIELD + " : sequence name" );
- System.out.println( " -k=<n> : key column in mapping table (0 based)," );
- System.out.println( " names of the node to be decorated - default is 0" );
- System.out.println( " -v=<n> : value column in mapping table (0 based)," );
- System.out.println( " data which with to decorate - default is 1" );
- System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION
- + " : to extract bracketed scientific names, e.g. [Nematostella vectensis]" );
- System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION
- + " : to extract bracketed taxonomic codes, e.g. [NEMVE]" );
- System.out.println( " -s=<c> : column separator in mapping file, default is \""
- + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" );
- System.out.println( " -x : process name \"intelligently\" (only for -f=n)" );
- System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION
- + " : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" );
- System.out.println( " -c : cut name after first space (only for -f=n)" );
- System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION
- + " : trim node name to be replaced after tilde" );
- System.out.println();
- System.exit( -1 );
- }
+ final static private String PRG_VERSION = "1.12";
+ final static private String PRG_DATE = "2012.12.21";
public static void main( final String args[] ) {
ForesterUtil.printProgramInformation( decorator.PRG_NAME, decorator.PRG_VERSION, decorator.PRG_DATE );
allowed_options.add( decorator.VALUE_COLUMN );
allowed_options.add( decorator.MAPPING_FILE_SEPARATOR_OPTION );
allowed_options.add( decorator.EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION );
+ allowed_options.add( decorator.EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION );
allowed_options.add( decorator.TREE_NAME_OPTION );
allowed_options.add( decorator.TREE_ID_OPTION );
allowed_options.add( decorator.TREE_DESC_OPTION );
ForesterUtil.programMessage( PRG_NAME, "wrote: " + phylogenies_outfile );
ForesterUtil.programMessage( PRG_NAME, "OK." );
}
+
+ private static void argumentsError() {
+ System.out.println();
+ System.out.println( decorator.PRG_NAME + " -" + ADVANCED_TABLE_OPTION + " | -f=<c> <phylogenies infile> "
+ + "[mapping table file] <phylogenies outfile>" );
+ System.out.println();
+ System.out.println( "options:" );
+ System.out.println();
+ System.out.println( " -" + ADVANCED_TABLE_OPTION + " : table instead of one to one map (-f=<c>)" );
+ System.out.println( " -r=<n> : allow to remove up to n characters from the end of the names" );
+ System.out.println( " in phylogenies infile if not found (in map) otherwise" );
+ System.out.println( " -p : picky, fails if node name not found in mapping table" );
+ System.out.println( " -" + TREE_NAME_OPTION + "=<s>: name for the phylogeny" );
+ System.out.println( " -" + TREE_ID_OPTION + "=<s>: identifier for the phylogeny (in the form provider:value)" );
+ System.out.println( " -" + TREE_DESC_OPTION + "=<s>: description for phylogenies" );
+ System.out.println();
+ System.out.println();
+ System.out.println( "advanced options, only available if -" + ADVANCED_TABLE_OPTION + " is not used:" );
+ System.out.println();
+ System.out.println( " -f=<c> : field to be replaced: " + NODE_NAME_FIELD + " : node name" );
+ System.out.println( " " + SEQUENCE_ANNOTATION_DESC
+ + " : sequence annotation description" );
+ System.out.println( " " + DS_FILED + " : domain structure" );
+ System.out.println( " " + TAXONOMY_CODE_FIELD + " : taxonomy code" );
+ System.out.println( " " + TAXONOMY_SCIENTIFIC_NAME_FIELD
+ + ": taxonomy scientific name" );
+ System.out.println( " " + SEQUENCE_NAME_FIELD + " : sequence name" );
+ System.out.println( " -k=<n> : key column in mapping table (0 based)," );
+ System.out.println( " names of the node to be decorated - default is 0" );
+ System.out.println( " -v=<n> : value column in mapping table (0 based)," );
+ System.out.println( " data which with to decorate - default is 1" );
+ System.out.println( " -" + EXTRACT_BRACKETED_SCIENTIC_NAME_OPTION
+ + " : to extract bracketed scientific names, e.g. [Nematostella vectensis]" );
+ System.out.println( " -" + EXTRACT_BRACKETED_TAXONOMIC_CODE_OPTION
+ + " : to extract bracketed taxonomic codes, e.g. [NEMVE]" );
+ System.out.println( " -s=<c> : column separator in mapping file, default is \""
+ + decorator.MAPPING_FILE_SEPARATOR_DEFAULT + "\"" );
+ System.out.println( " -x : process name \"intelligently\" (only for -f=n)" );
+ System.out.println( " -" + decorator.PROCESS_SIMILAR_TO_OPTION
+ + " : process name \"intelligently\" and process information after \"similar to\" (only for -f=n)" );
+ System.out.println( " -c : cut name after first space (only for -f=n)" );
+ System.out.println( " -" + decorator.TRIM_AFTER_TILDE_OPTION
+ + " : trim node name to be replaced after tilde" );
+ System.out.println();
+ System.exit( -1 );
+ }
}
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
-import java.util.regex.Pattern;
import org.forester.io.parsers.nhx.NHXFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
public final class PhylogenyDecorator {
// From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb:
- final private static String TP_TAXONOMY_CODE = "TAXONOMY_CODE";
- final private static String TP_TAXONOMY_ID = "TAXONOMY_ID";
- final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER";
- final private static String TP_TAXONOMY_SN = "TAXONOMY_SN";
- final private static String TP_TAXONOMY_CN = "TAXONOMY_CN";
- final private static String TP_TAXONOMY_SYN = "TAXONOMY_SYN";
- final private static String TP_SEQ_SYMBOL = "SEQ_SYMBOL";
- final private static String TP_SEQ_ACCESSION = "SEQ_ACCESSION";
- final private static String TP_SEQ_ACCESSION_SOURCE = "SEQ_ACCESSION_SOURCE";
- final private static String TP_SEQ_ANNOTATION_DESC = "SEQ_ANNOTATION_DESC";
- final private static String TP_SEQ_ANNOTATION_REF = "SEQ_ANNOTATION_REF";
- final private static String TP_SEQ_MOL_SEQ = "SEQ_MOL_SEQ";
- final private static String TP_SEQ_NAME = "SEQ_NAME";
- final private static String TP_NODE_NAME = "NODE_NAME";
- final private static Pattern NODENAME_SEQNUMBER_TAXDOMAINNUMBER = Pattern
- .compile( "^([a-fA-Z0-9]{1,5})_([A-Z0-9]{2,4}[A-Z])(\\d{1,4})$" );
- public final static boolean SANITIZE = false;
- public final static boolean VERBOSE = true;
- private static final boolean CUT = true;
+ final private static String TP_TAXONOMY_CODE = "TAXONOMY_CODE";
+ final private static String TP_TAXONOMY_ID = "TAXONOMY_ID";
+ final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER";
+ final private static String TP_TAXONOMY_SN = "TAXONOMY_SN";
+ final private static String TP_TAXONOMY_CN = "TAXONOMY_CN";
+ final private static String TP_TAXONOMY_SYN = "TAXONOMY_SYN";
+ final private static String TP_SEQ_SYMBOL = "SEQ_SYMBOL";
+ final private static String TP_SEQ_ACCESSION = "SEQ_ACCESSION";
+ final private static String TP_SEQ_ACCESSION_SOURCE = "SEQ_ACCESSION_SOURCE";
+ final private static String TP_SEQ_ANNOTATION_DESC = "SEQ_ANNOTATION_DESC";
+ final private static String TP_SEQ_ANNOTATION_REF = "SEQ_ANNOTATION_REF";
+ final private static String TP_SEQ_MOL_SEQ = "SEQ_MOL_SEQ";
+ final private static String TP_SEQ_NAME = "SEQ_NAME";
+ final private static String TP_NODE_NAME = "NODE_NAME";
+ public final static boolean SANITIZE = false;
+ public final static boolean VERBOSE = true;
private PhylogenyDecorator() {
// Not needed.
}
}
- /**
- *
- *
- *
- *
- *
- * @param phylogeny
- * @param map
- * maps names (in phylogeny) to new values
- * @param field
- * @param picky
- * @throws IllegalArgumentException
- * @throws NHXFormatException
- * @throws PhyloXmlDataFormatException
- */
public static void decorate( final Phylogeny phylogeny,
final Map<String, String> map,
final FIELD field,
if ( intermediate_map != null ) {
name = PhylogenyDecorator.extractIntermediate( intermediate_map, name );
}
- // int space_index = name.indexOf( " " );
- // if ( CUT && space_index > 0 ) {
- // int y = name.lastIndexOf( "|" );
- // name = name.substring( y + 1, space_index );
- // }
- // String new_value = null;
- // for( String key : map.keySet() ) {
- // if ( key.indexOf( name ) >= 0 ) {
- // if ( new_value == null ) {
- // new_value = map.get( key );
- // }
- // else {
- // System.out.println( name + " is not unique" );
- // System.exit( -1 );
- // }
- // }
- // }
- // if ( new_value != null ) {
if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
String new_value = map.get( name );
int x = 0;
}
}
+ public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
+ throws IOException {
+ final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
+ BasicTable<String> mapping_table = null;
+ mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false, false );
+ for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
+ final Map<String, String> row_map = new HashMap<String, String>();
+ String name = null;
+ for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) {
+ final String table_cell = mapping_table.getValue( col, row );
+ if ( col == 0 ) {
+ name = table_cell;
+ }
+ else if ( table_cell != null ) {
+ final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) );
+ final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() );
+ row_map.put( key, val );
+ }
+ }
+ map.put( name, row_map );
+ }
+ return map;
+ }
+
private static String deleteAtFirstSpace( final String name ) {
final int first_space = name.indexOf( " " );
if ( first_space > 1 ) {
return new_name;
}
- public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
- throws IOException {
- final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
- BasicTable<String> mapping_table = null;
- mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false, false );
- for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
- final Map<String, String> row_map = new HashMap<String, String>();
- String name = null;
- for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) {
- final String table_cell = mapping_table.getValue( col, row );
- if ( col == 0 ) {
- name = table_cell;
- }
- else if ( table_cell != null ) {
- final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) );
- final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() );
- row_map.put( key, val );
- }
- }
- map.put( name, row_map );
- }
- return map;
- }
-
private static String processNameIntelligently( final String name ) {
final String[] s = name.split( " " );
if ( s.length < 2 ) {