inprogress
[jalview.git] / forester / java / src / org / forester / tools / PhylogenyDecorator.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25
26 package org.forester.tools;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.util.HashMap;
31 import java.util.Map;
32 import java.util.regex.Matcher;
33
34 import org.forester.io.parsers.nhx.NHXFormatException;
35 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
36 import org.forester.io.parsers.util.ParserUtils;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.PhylogenyNode;
39 import org.forester.phylogeny.data.Accession;
40 import org.forester.phylogeny.data.Annotation;
41 import org.forester.phylogeny.data.DomainArchitecture;
42 import org.forester.phylogeny.data.Identifier;
43 import org.forester.phylogeny.data.Sequence;
44 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
45 import org.forester.util.BasicTable;
46 import org.forester.util.BasicTableParser;
47 import org.forester.util.ForesterUtil;
48
49 public final class PhylogenyDecorator {
50
51     // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb:
52     final private static String TP_TAXONOMY_CODE        = "TAXONOMY_CODE";
53     final private static String TP_TAXONOMY_ID          = "TAXONOMY_ID";
54     final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER";
55     final private static String TP_TAXONOMY_SN          = "TAXONOMY_SN";
56     final private static String TP_TAXONOMY_CN          = "TAXONOMY_CN";
57     final private static String TP_TAXONOMY_SYN         = "TAXONOMY_SYN";
58     final private static String TP_SEQ_SYMBOL           = "SEQ_SYMBOL";
59     final private static String TP_SEQ_ACCESSION        = "SEQ_ACCESSION";
60     final private static String TP_SEQ_ACCESSION_SOURCE = "SEQ_ACCESSION_SOURCE";
61     final private static String TP_SEQ_ANNOTATION_DESC  = "SEQ_ANNOTATION_DESC";
62     final private static String TP_SEQ_ANNOTATION_REF   = "SEQ_ANNOTATION_REF";
63     final private static String TP_SEQ_MOL_SEQ          = "SEQ_MOL_SEQ";
64     final private static String TP_SEQ_NAME             = "SEQ_NAME";
65     final private static String TP_NODE_NAME            = "NODE_NAME";
66     public final static boolean SANITIZE                = false;
67     public final static boolean VERBOSE                 = true;
68
69     private PhylogenyDecorator() {
70         // Not needed.
71     }
72
73     public static void decorate( final Phylogeny phylogeny,
74                                  final Map<String, Map<String, String>> map,
75                                  final boolean picky,
76                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
77             throws IllegalArgumentException, PhyloXmlDataFormatException {
78         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
79             final PhylogenyNode node = iter.next();
80             final String name = node.getName();
81             if ( !ForesterUtil.isEmpty( name ) ) {
82                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
83                     Map<String, String> new_values = map.get( name );
84                     int x = 0;
85                     while ( ( new_values == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
86                             && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
87                         new_values = map.get( name.substring( 0, name.length() - x ) );
88                         ++x;
89                     }
90                     if ( new_values != null ) {
91                         if ( new_values.containsKey( TP_TAXONOMY_CODE ) ) {
92                             ForesterUtil.ensurePresenceOfTaxonomy( node );
93                             node.getNodeData().getTaxonomy().setTaxonomyCode( new_values.get( TP_TAXONOMY_CODE ) );
94                         }
95                         if ( new_values.containsKey( TP_TAXONOMY_ID )
96                                 && new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) {
97                             ForesterUtil.ensurePresenceOfTaxonomy( node );
98                             node.getNodeData()
99                                     .getTaxonomy()
100                                     .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ),
101                                                                     new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) );
102                         }
103                         else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) {
104                             ForesterUtil.ensurePresenceOfTaxonomy( node );
105                             node.getNodeData().getTaxonomy()
106                                     .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) );
107                         }
108                         if ( new_values.containsKey( TP_TAXONOMY_SN ) ) {
109                             ForesterUtil.ensurePresenceOfTaxonomy( node );
110                             node.getNodeData().getTaxonomy().setScientificName( new_values.get( TP_TAXONOMY_SN ) );
111                         }
112                         if ( new_values.containsKey( TP_TAXONOMY_CN ) ) {
113                             ForesterUtil.ensurePresenceOfTaxonomy( node );
114                             node.getNodeData().getTaxonomy().setCommonName( new_values.get( TP_TAXONOMY_CN ) );
115                         }
116                         if ( new_values.containsKey( TP_TAXONOMY_SYN ) ) {
117                             ForesterUtil.ensurePresenceOfTaxonomy( node );
118                             node.getNodeData().getTaxonomy().getSynonyms().add( new_values.get( TP_TAXONOMY_SYN ) );
119                         }
120                         if ( new_values.containsKey( TP_SEQ_ACCESSION )
121                                 && new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) {
122                             ForesterUtil.ensurePresenceOfSequence( node );
123                             node.getNodeData()
124                                     .getSequence()
125                                     .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ),
126                                                                   new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) );
127                         }
128                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) {
129                             ForesterUtil.ensurePresenceOfSequence( node );
130                             final Annotation ann = new Annotation();
131                             ann.setDesc( new_values.get( TP_SEQ_ANNOTATION_DESC ) );
132                             node.getNodeData().getSequence().addAnnotation( ann );
133                         }
134                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_REF ) ) {
135                             ForesterUtil.ensurePresenceOfSequence( node );
136                             final Annotation ann = new Annotation( new_values.get( TP_SEQ_ANNOTATION_REF ) );
137                             node.getNodeData().getSequence().addAnnotation( ann );
138                         }
139                         if ( new_values.containsKey( TP_SEQ_SYMBOL ) ) {
140                             ForesterUtil.ensurePresenceOfSequence( node );
141                             node.getNodeData().getSequence().setSymbol( new_values.get( TP_SEQ_SYMBOL ) );
142                         }
143                         if ( new_values.containsKey( TP_SEQ_NAME ) ) {
144                             ForesterUtil.ensurePresenceOfSequence( node );
145                             node.getNodeData().getSequence().setName( new_values.get( TP_SEQ_NAME ) );
146                         }
147                         if ( new_values.containsKey( TP_SEQ_MOL_SEQ ) ) {
148                             ForesterUtil.ensurePresenceOfSequence( node );
149                             node.getNodeData().getSequence().setMolecularSequence( new_values.get( TP_SEQ_MOL_SEQ ) );
150                         }
151                         if ( new_values.containsKey( TP_NODE_NAME ) ) {
152                             node.setName( new_values.get( TP_NODE_NAME ) );
153                         }
154                     } // if ( new_values != null ) 
155                 } // if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) )
156                 else if ( picky ) {
157                     throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
158                 }
159             }
160         }
161     }
162
163     public static void decorate( final Phylogeny phylogeny,
164                                  final Map<String, String> map,
165                                  final FIELD field,
166                                  final boolean extract_bracketed_scientific_name,
167                                  final boolean extract_bracketed_tax_code,
168                                  final boolean picky,
169                                  final boolean cut_name_after_space,
170                                  final boolean process_name_intelligently,
171                                  final boolean process_similar_to,
172                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
173                                  final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
174             PhyloXmlDataFormatException {
175         PhylogenyDecorator.decorate( phylogeny,
176                                      map,
177                                      field,
178                                      extract_bracketed_scientific_name,
179                                      extract_bracketed_tax_code,
180                                      picky,
181                                      null,
182                                      cut_name_after_space,
183                                      process_name_intelligently,
184                                      process_similar_to,
185                                      numbers_of_chars_allowed_to_remove_if_not_found_in_map,
186                                      trim_after_tilde );
187     }
188
189     /**
190      * 
191      * 
192      * 
193      * @param phylogeny
194      * @param map
195      *            maps names (in phylogeny) to new values if intermediate_map is
196      *            null otherwise maps intermediate value to new value
197      * @param field
198      * @param picky
199      * @param intermediate_map
200      *            maps name (in phylogeny) to a intermediate value
201      * @throws IllegalArgumentException
202      * @throws PhyloXmlDataFormatException 
203      */
204     public static void decorate( final Phylogeny phylogeny,
205                                  final Map<String, String> map,
206                                  final FIELD field,
207                                  final boolean extract_bracketed_scientific_name,
208                                  final boolean extract_bracketed_tax_code,
209                                  final boolean picky,
210                                  final Map<String, String> intermediate_map,
211                                  final boolean cut_name_after_space,
212                                  final boolean process_name_intelligently,
213                                  final boolean process_similar_to,
214                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
215                                  final boolean trim_after_tilde ) throws IllegalArgumentException,
216             PhyloXmlDataFormatException {
217         if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
218             throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" );
219         }
220         if ( map.isEmpty() ) {
221             throw new IllegalArgumentException( "map is empty" );
222         }
223         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
224             final PhylogenyNode node = iter.next();
225             String name = node.getName();
226             String tilde_annotation = null;
227             if ( trim_after_tilde && ( name.indexOf( '~' ) > 0 ) ) {
228                 final int ti = name.indexOf( '~' );
229                 tilde_annotation = name.substring( ti );
230                 name = name.substring( 0, ti );
231             }
232             if ( !ForesterUtil.isEmpty( name ) ) {
233                 if ( intermediate_map != null ) {
234                     name = PhylogenyDecorator.extractIntermediate( intermediate_map, name );
235                 }
236                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
237                     String new_value = map.get( name );
238                     int x = 0;
239                     while ( ( new_value == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
240                             && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
241                         new_value = map.get( name.substring( 0, name.length() - x ) );
242                         ++x;
243                     }
244                     if ( new_value != null ) {
245                         new_value = new_value.trim();
246                         new_value.replaceAll( "/\\s+/", " " );
247                         if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) {
248                             new_value = extractBracketedScientificNames( node, new_value );
249                         }
250                         else if ( extract_bracketed_tax_code ) {
251                             if ( ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value ).find() ) {
252                                 new_value = extractBracketedTaxCodes( node, new_value );
253                             }
254                             else if ( ParserUtils.TAXOMONY_CODE_PATTERN_6.matcher( new_value ).find() ) {
255                                 new_value = extractBracketedTaxCodes6( node, new_value );
256                             }
257                             else if ( picky ) {
258                                 throw new IllegalArgumentException( " could not get taxonomy from \"" + new_value
259                                         + "\"" );
260                             }
261                         }
262                         switch ( field ) {
263                             case SEQUENCE_ANNOTATION_DESC:
264                                 if ( PhylogenyDecorator.VERBOSE ) {
265                                     System.out.println( name + ": " + new_value );
266                                 }
267                                 if ( !node.getNodeData().isHasSequence() ) {
268                                     node.getNodeData().setSequence( new Sequence() );
269                                 }
270                                 final Annotation annotation = new Annotation();
271                                 annotation.setDesc( new_value );
272                                 node.getNodeData().getSequence().addAnnotation( annotation );
273                                 break;
274                             case DOMAIN_STRUCTURE:
275                                 if ( PhylogenyDecorator.VERBOSE ) {
276                                     System.out.println( name + ": " + new_value );
277                                 }
278                                 if ( !node.getNodeData().isHasSequence() ) {
279                                     node.getNodeData().setSequence( new Sequence() );
280                                 }
281                                 node.getNodeData().getSequence()
282                                         .setDomainArchitecture( new DomainArchitecture( new_value ) );
283                                 break;
284                             case TAXONOMY_CODE:
285                                 if ( PhylogenyDecorator.VERBOSE ) {
286                                     System.out.println( name + ": " + new_value );
287                                 }
288                                 ForesterUtil.ensurePresenceOfTaxonomy( node );
289                                 node.getNodeData().getTaxonomy().setTaxonomyCode( new_value );
290                                 break;
291                             case TAXONOMY_SCIENTIFIC_NAME:
292                                 if ( PhylogenyDecorator.VERBOSE ) {
293                                     System.out.println( name + ": " + new_value );
294                                 }
295                                 ForesterUtil.ensurePresenceOfTaxonomy( node );
296                                 node.getNodeData().getTaxonomy().setScientificName( new_value );
297                                 break;
298                             case SEQUENCE_NAME:
299                                 if ( trim_after_tilde ) {
300                                     new_value = addTildeAnnotation( tilde_annotation, new_value );
301                                 }
302                                 if ( PhylogenyDecorator.VERBOSE ) {
303                                     System.out.println( name + ": " + new_value );
304                                 }
305                                 if ( !node.getNodeData().isHasSequence() ) {
306                                     node.getNodeData().setSequence( new Sequence() );
307                                 }
308                                 node.getNodeData().getSequence().setName( new_value );
309                                 break;
310                             case NODE_NAME:
311                                 if ( PhylogenyDecorator.VERBOSE ) {
312                                     System.out.print( name + " -> " );
313                                 }
314                                 if ( cut_name_after_space ) {
315                                     if ( PhylogenyDecorator.VERBOSE ) {
316                                         System.out.print( new_value + " -> " );
317                                     }
318                                     new_value = PhylogenyDecorator.deleteAtFirstSpace( new_value );
319                                 }
320                                 else if ( process_name_intelligently ) {
321                                     if ( PhylogenyDecorator.VERBOSE ) {
322                                         System.out.print( new_value + " -> " );
323                                     }
324                                     new_value = PhylogenyDecorator.processNameIntelligently( new_value );
325                                 }
326                                 else if ( process_similar_to ) {
327                                     if ( PhylogenyDecorator.VERBOSE ) {
328                                         System.out.print( new_value + " -> " );
329                                     }
330                                     new_value = PhylogenyDecorator.processSimilarTo( new_value );
331                                 }
332                                 if ( PhylogenyDecorator.SANITIZE ) {
333                                     new_value = PhylogenyDecorator.sanitize( new_value );
334                                 }
335                                 if ( trim_after_tilde ) {
336                                     new_value = addTildeAnnotation( tilde_annotation, new_value );
337                                 }
338                                 if ( PhylogenyDecorator.VERBOSE ) {
339                                     System.out.println( new_value );
340                                 }
341                                 node.setName( new_value );
342                                 break;
343                             default:
344                                 throw new RuntimeException( "unknown field \"" + field + "\"" );
345                         }
346                     }
347                 }
348                 else if ( picky ) {
349                     throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
350                 }
351             }
352         }
353     }
354
355     private final static String addTildeAnnotation( final String tilde_annotation, final String new_value ) {
356         if ( ForesterUtil.isEmpty( tilde_annotation ) ) {
357             return new_value;
358         }
359         return new_value + tilde_annotation;
360     }
361
362     public static void decorate( final Phylogeny[] phylogenies,
363                                  final Map<String, Map<String, String>> map,
364                                  final boolean picky,
365                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
366             throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException {
367         for( final Phylogeny phylogenie : phylogenies ) {
368             PhylogenyDecorator
369                     .decorate( phylogenie, map, picky, numbers_of_chars_allowed_to_remove_if_not_found_in_map );
370         }
371     }
372
373     public static void decorate( final Phylogeny[] phylogenies,
374                                  final Map<String, String> map,
375                                  final FIELD field,
376                                  final boolean extract_bracketed_scientific_name,
377                                  final boolean extract_bracketed_tax_code,
378                                  final boolean picky,
379                                  final boolean cut_name_after_space,
380                                  final boolean process_name_intelligently,
381                                  final boolean process_similar_to,
382                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
383                                  final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
384             PhyloXmlDataFormatException {
385         for( final Phylogeny phylogenie : phylogenies ) {
386             PhylogenyDecorator.decorate( phylogenie,
387                                          map,
388                                          field,
389                                          extract_bracketed_scientific_name,
390                                          extract_bracketed_tax_code,
391                                          picky,
392                                          cut_name_after_space,
393                                          process_name_intelligently,
394                                          process_similar_to,
395                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
396                                          trim_after_tilde );
397         }
398     }
399
400     public static void decorate( final Phylogeny[] phylogenies,
401                                  final Map<String, String> map,
402                                  final FIELD field,
403                                  final boolean extract_bracketed_scientific_name,
404                                  final boolean extract_bracketed_tax_code,
405                                  final boolean picky,
406                                  final Map<String, String> intermediate_map,
407                                  final boolean cut_name_after_space,
408                                  final boolean process_name_intelligently,
409                                  final boolean process_similar_to,
410                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
411                                  final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
412             PhyloXmlDataFormatException {
413         for( final Phylogeny phylogenie : phylogenies ) {
414             PhylogenyDecorator.decorate( phylogenie,
415                                          map,
416                                          field,
417                                          extract_bracketed_scientific_name,
418                                          extract_bracketed_tax_code,
419                                          picky,
420                                          intermediate_map,
421                                          cut_name_after_space,
422                                          process_name_intelligently,
423                                          process_similar_to,
424                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
425                                          trim_after_tilde );
426         }
427     }
428
429     public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
430             throws IOException {
431         final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
432         BasicTable<String> mapping_table = null;
433         mapping_table = BasicTableParser.parse( mapping_table_file, '\t', false, false );
434         for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
435             final Map<String, String> row_map = new HashMap<String, String>();
436             String name = null;
437             for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) {
438                 final String table_cell = mapping_table.getValue( col, row );
439                 if ( col == 0 ) {
440                     name = table_cell;
441                 }
442                 else if ( table_cell != null ) {
443                     final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) );
444                     final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() );
445                     row_map.put( key, val );
446                 }
447             }
448             map.put( name, row_map );
449         }
450         return map;
451     }
452
453     private static String deleteAtFirstSpace( final String name ) {
454         final int first_space = name.indexOf( " " );
455         if ( first_space > 1 ) {
456             return name.substring( 0, first_space ).trim();
457         }
458         return name;
459     }
460
461     private static String extractBracketedScientificNames( final PhylogenyNode node, final String new_value ) {
462         final int i = new_value.lastIndexOf( "[" );
463         final String scientific_name = new_value.substring( i + 1, new_value.length() - 1 );
464         ForesterUtil.ensurePresenceOfTaxonomy( node );
465         node.getNodeData().getTaxonomy().setScientificName( scientific_name );
466         return new_value.substring( 0, i - 1 ).trim();
467     }
468
469     private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) {
470         final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_4.matcher( new_value );
471         String tc = "?";
472         if ( m.find() ) {
473             tc = m.group( 1 );
474         }
475         ForesterUtil.ensurePresenceOfTaxonomy( node );
476         try {
477             node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
478         }
479         catch ( final PhyloXmlDataFormatException e ) {
480             throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
481         }
482         return new_value; //TODO //FIXME
483     }
484
485     private static String extractBracketedTaxCodes6( final PhylogenyNode node, final String new_value ) {
486         final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_6.matcher( new_value );
487         String tc = "?";
488         if ( m.find() ) {
489             tc = m.group( 1 );
490         }
491         ForesterUtil.ensurePresenceOfTaxonomy( node );
492         try {
493             if ( tc.length() == 6 ) {
494                 final String t = tc.substring( 0, 5 );
495                 System.out.println( "WARNING: taxonomy code " + tc + " -> " + t );
496                 tc = t;
497             }
498             else {
499                 throw new IllegalArgumentException();
500             }
501             node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
502         }
503         catch ( final PhyloXmlDataFormatException e ) {
504             throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
505         }
506         return new_value; //TODO //FIXME
507     }
508
509     private static String extractIntermediate( final Map<String, String> intermediate_map, final String name ) {
510         String new_name = null;
511         if ( PhylogenyDecorator.VERBOSE ) {
512             System.out.print( name + " => " );
513         }
514         if ( intermediate_map.containsKey( name ) ) {
515             new_name = intermediate_map.get( name );
516             if ( ForesterUtil.isEmpty( new_name ) ) {
517                 throw new IllegalArgumentException( "\"" + name + "\" maps to null or empty string in secondary map" );
518             }
519         }
520         else {
521             throw new IllegalArgumentException( "\"" + name + "\" not found in name secondary map" );
522         }
523         if ( PhylogenyDecorator.VERBOSE ) {
524             System.out.println( new_name + "  " );
525         }
526         return new_name;
527     }
528
529     private static String processNameIntelligently( final String name ) {
530         final String[] s = name.split( " " );
531         if ( s.length < 2 ) {
532             return name;
533         }
534         else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "|" ) > 0 ) ) {
535             return s[ 0 ];
536         }
537         else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "|" ) > 0 ) ) {
538             return s[ 1 ];
539         }
540         else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "." ) > 0 ) ) {
541             return s[ 0 ];
542         }
543         else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "." ) > 0 ) ) {
544             return s[ 1 ];
545         }
546         else if ( s[ 0 ].indexOf( "_" ) > 0 ) {
547             return s[ 0 ];
548         }
549         else if ( s[ 1 ].indexOf( "_" ) > 0 ) {
550             return s[ 1 ];
551         }
552         else {
553             return s[ 0 ];
554         }
555     }
556
557     private static String processSimilarTo( final String name ) {
558         final int i = name.toLowerCase().indexOf( "similar to" );
559         String similar_to = "";
560         if ( i >= 0 ) {
561             similar_to = " similarity=" + name.substring( i + 10 ).trim();
562         }
563         final String pi = processNameIntelligently( name );
564         return pi + similar_to;
565     }
566
567     private static String sanitize( String s ) {
568         s = s.replace( ' ', '_' );
569         s = s.replace( '(', '{' );
570         s = s.replace( ')', '}' );
571         s = s.replace( '[', '{' );
572         s = s.replace( ']', '}' );
573         s = s.replace( ',', '_' );
574         return s;
575     }
576
577     public static enum FIELD {
578         NODE_NAME, SEQUENCE_ANNOTATION_DESC, DOMAIN_STRUCTURE, TAXONOMY_CODE, TAXONOMY_SCIENTIFIC_NAME, SEQUENCE_NAME;
579     }
580 }