90959dc836e4a419fccdf7b022c2024761c3fb2e
[jalview.git] / forester / java / src / org / forester / tools / PhylogenyDecorator.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.tools;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.util.HashMap;
31 import java.util.Map;
32 import java.util.regex.Matcher;
33 import java.util.regex.Pattern;
34
35 import org.forester.archaeopteryx.AptxUtil;
36 import org.forester.io.parsers.nhx.NHXFormatException;
37 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
38 import org.forester.phylogeny.Phylogeny;
39 import org.forester.phylogeny.PhylogenyNode;
40 import org.forester.phylogeny.data.Accession;
41 import org.forester.phylogeny.data.Annotation;
42 import org.forester.phylogeny.data.DomainArchitecture;
43 import org.forester.phylogeny.data.Identifier;
44 import org.forester.phylogeny.data.Sequence;
45 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
46 import org.forester.util.BasicTable;
47 import org.forester.util.BasicTableParser;
48 import org.forester.util.ForesterUtil;
49
50 public final class PhylogenyDecorator {
51
52     // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb:
53     final private static String  TP_TAXONOMY_CODE                   = "TAXONOMY_CODE";
54     final private static String  TP_TAXONOMY_ID                     = "TAXONOMY_ID";
55     final private static String  TP_TAXONOMY_ID_PROVIDER            = "TAXONOMY_ID_PROVIDER";
56     final private static String  TP_TAXONOMY_SN                     = "TAXONOMY_SN";
57     final private static String  TP_TAXONOMY_CN                     = "TAXONOMY_CN";
58     final private static String  TP_TAXONOMY_SYN                    = "TAXONOMY_SYN";
59     final private static String  TP_SEQ_SYMBOL                      = "SEQ_SYMBOL";
60     final private static String  TP_SEQ_ACCESSION                   = "SEQ_ACCESSION";
61     final private static String  TP_SEQ_ACCESSION_SOURCE            = "SEQ_ACCESSION_SOURCE";
62     final private static String  TP_SEQ_ANNOTATION_DESC             = "SEQ_ANNOTATION_DESC";
63     final private static String  TP_SEQ_ANNOTATION_REF              = "SEQ_ANNOTATION_REF";
64     final private static String  TP_SEQ_MOL_SEQ                     = "SEQ_MOL_SEQ";
65     final private static String  TP_SEQ_NAME                        = "SEQ_NAME";
66     final private static String  TP_NODE_NAME                       = "NODE_NAME";
67     final private static Pattern NODENAME_SEQNUMBER_TAXDOMAINNUMBER = Pattern
68                                                                             .compile( "^([a-fA-Z0-9]{1,5})_([A-Z0-9]{2,4}[A-Z])(\\d{1,4})$" );
69     public final static boolean  SANITIZE                           = false;
70     public final static boolean  VERBOSE                            = true;
71
72     private PhylogenyDecorator() {
73         // Not needed.
74     }
75
76     public static void decorate( final Phylogeny phylogeny,
77                                  final Map<String, Map<String, String>> map,
78                                  final boolean picky,
79                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
80             throws IllegalArgumentException, PhyloXmlDataFormatException {
81         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
82             final PhylogenyNode node = iter.next();
83             final String name = node.getName();
84             if ( !ForesterUtil.isEmpty( name ) ) {
85                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
86                     Map<String, String> new_values = map.get( name );
87                     int x = 0;
88                     while ( ( new_values == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
89                             && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
90                         new_values = map.get( name.substring( 0, name.length() - x ) );
91                         ++x;
92                     }
93                     if ( new_values != null ) {
94                         if ( new_values.containsKey( TP_TAXONOMY_CODE ) ) {
95                             AptxUtil.ensurePresenceOfTaxonomy( node );
96                             node.getNodeData().getTaxonomy().setTaxonomyCode( new_values.get( TP_TAXONOMY_CODE ) );
97                         }
98                         if ( new_values.containsKey( TP_TAXONOMY_ID )
99                                 && new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) {
100                             AptxUtil.ensurePresenceOfTaxonomy( node );
101                             node.getNodeData()
102                                     .getTaxonomy()
103                                     .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ),
104                                                                     new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) );
105                         }
106                         else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) {
107                             AptxUtil.ensurePresenceOfTaxonomy( node );
108                             node.getNodeData().getTaxonomy()
109                                     .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) );
110                         }
111                         if ( new_values.containsKey( TP_TAXONOMY_SN ) ) {
112                             AptxUtil.ensurePresenceOfTaxonomy( node );
113                             node.getNodeData().getTaxonomy().setScientificName( new_values.get( TP_TAXONOMY_SN ) );
114                         }
115                         if ( new_values.containsKey( TP_TAXONOMY_CN ) ) {
116                             AptxUtil.ensurePresenceOfTaxonomy( node );
117                             node.getNodeData().getTaxonomy().setCommonName( new_values.get( TP_TAXONOMY_CN ) );
118                         }
119                         if ( new_values.containsKey( TP_TAXONOMY_SYN ) ) {
120                             AptxUtil.ensurePresenceOfTaxonomy( node );
121                             node.getNodeData().getTaxonomy().getSynonyms().add( new_values.get( TP_TAXONOMY_SYN ) );
122                         }
123                         if ( new_values.containsKey( TP_SEQ_ACCESSION )
124                                 && new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) {
125                             AptxUtil.ensurePresenceOfSequence( node );
126                             node.getNodeData()
127                                     .getSequence()
128                                     .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ),
129                                                                   new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) );
130                         }
131                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) {
132                             AptxUtil.ensurePresenceOfSequence( node );
133                             final Annotation ann = new Annotation( "?" );
134                             ann.setDesc( new_values.get( TP_SEQ_ANNOTATION_DESC ) );
135                             node.getNodeData().getSequence().addAnnotation( ann );
136                         }
137                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_REF ) ) {
138                             AptxUtil.ensurePresenceOfSequence( node );
139                             final Annotation ann = new Annotation( new_values.get( TP_SEQ_ANNOTATION_REF ) );
140                             node.getNodeData().getSequence().addAnnotation( ann );
141                         }
142                         if ( new_values.containsKey( TP_SEQ_SYMBOL ) ) {
143                             AptxUtil.ensurePresenceOfSequence( node );
144                             node.getNodeData().getSequence().setSymbol( new_values.get( TP_SEQ_SYMBOL ) );
145                         }
146                         if ( new_values.containsKey( TP_SEQ_NAME ) ) {
147                             AptxUtil.ensurePresenceOfSequence( node );
148                             node.getNodeData().getSequence().setName( new_values.get( TP_SEQ_NAME ) );
149                         }
150                         if ( new_values.containsKey( TP_SEQ_MOL_SEQ ) ) {
151                             AptxUtil.ensurePresenceOfSequence( node );
152                             node.getNodeData().getSequence().setMolecularSequence( new_values.get( TP_SEQ_MOL_SEQ ) );
153                         }
154                         if ( new_values.containsKey( TP_NODE_NAME ) ) {
155                             node.setName( new_values.get( TP_NODE_NAME ) );
156                         }
157                     } // if ( new_values != null ) 
158                 } // if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) )
159                 else if ( picky ) {
160                     throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
161                 }
162             }
163         }
164     }
165
166     /**
167      * 
168      * 
169      * 
170      * 
171      * 
172      * @param phylogeny
173      * @param map
174      *            maps names (in phylogeny) to new values
175      * @param field
176      * @param picky
177      * @throws IllegalArgumentException
178      * @throws NHXFormatException
179      * @throws PhyloXmlDataFormatException 
180      */
181     public static void decorate( final Phylogeny phylogeny,
182                                  final Map<String, String> map,
183                                  final FIELD field,
184                                  final boolean extract_bracketed_scientific_name,
185                                  final boolean picky,
186                                  final boolean cut_name_after_space,
187                                  final boolean process_name_intelligently,
188                                  final boolean process_similar_to,
189                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
190                                  final boolean move_domain_numbers_at_end_to_middle ) throws IllegalArgumentException,
191             NHXFormatException, PhyloXmlDataFormatException {
192         PhylogenyDecorator.decorate( phylogeny,
193                                      map,
194                                      field,
195                                      extract_bracketed_scientific_name,
196                                      picky,
197                                      null,
198                                      cut_name_after_space,
199                                      process_name_intelligently,
200                                      process_similar_to,
201                                      numbers_of_chars_allowed_to_remove_if_not_found_in_map,
202                                      move_domain_numbers_at_end_to_middle );
203     }
204
205     /**
206      * 
207      * 
208      * 
209      * @param phylogeny
210      * @param map
211      *            maps names (in phylogeny) to new values if intermediate_map is
212      *            null otherwise maps intermediate value to new value
213      * @param field
214      * @param picky
215      * @param intermediate_map
216      *            maps name (in phylogeny) to a intermediate value
217      * @throws IllegalArgumentException
218      * @throws PhyloXmlDataFormatException 
219      */
220     public static void decorate( final Phylogeny phylogeny,
221                                  final Map<String, String> map,
222                                  final FIELD field,
223                                  final boolean extract_bracketed_scientific_name,
224                                  final boolean picky,
225                                  final Map<String, String> intermediate_map,
226                                  final boolean cut_name_after_space,
227                                  final boolean process_name_intelligently,
228                                  final boolean process_similar_to,
229                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
230                                  final boolean move_domain_numbers_at_end_to_middle ) throws IllegalArgumentException,
231             PhyloXmlDataFormatException {
232         if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
233             throw new IllegalArgumentException( "Attempt to extract bracketed scientific name together with data field pointing to scientific name" );
234         }
235         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
236             final PhylogenyNode node = iter.next();
237             String name = node.getName();
238             if ( !ForesterUtil.isEmpty( name ) ) {
239                 if ( intermediate_map != null ) {
240                     name = PhylogenyDecorator.extractIntermediate( intermediate_map, name );
241                 }
242                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
243                     String new_value = map.get( name );
244                     int x = 0;
245                     while ( ( new_value == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
246                             && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
247                         new_value = map.get( name.substring( 0, name.length() - x ) );
248                         ++x;
249                     }
250                     if ( new_value != null ) {
251                         new_value = new_value.trim();
252                         new_value.replaceAll( "/\\s+/", " " );
253                         if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) {
254                             extractBracketedScientificNames( node, new_value );
255                         }
256                         switch ( field ) {
257                             case SEQUENCE_ANNOTATION_DESC:
258                                 if ( PhylogenyDecorator.VERBOSE ) {
259                                     System.out.println( name + ": " + new_value );
260                                 }
261                                 if ( !node.getNodeData().isHasSequence() ) {
262                                     node.getNodeData().setSequence( new Sequence() );
263                                 }
264                                 final Annotation annotation = new Annotation( "?" );
265                                 annotation.setDesc( new_value );
266                                 node.getNodeData().getSequence().addAnnotation( annotation );
267                                 break;
268                             case DOMAIN_STRUCTURE:
269                                 if ( PhylogenyDecorator.VERBOSE ) {
270                                     System.out.println( name + ": " + new_value );
271                                 }
272                                 if ( !node.getNodeData().isHasSequence() ) {
273                                     node.getNodeData().setSequence( new Sequence() );
274                                 }
275                                 node.getNodeData().getSequence()
276                                         .setDomainArchitecture( new DomainArchitecture( new_value ) );
277                                 break;
278                             case TAXONOMY_CODE:
279                                 if ( PhylogenyDecorator.VERBOSE ) {
280                                     System.out.println( name + ": " + new_value );
281                                 }
282                                 AptxUtil.ensurePresenceOfTaxonomy( node );
283                                 node.getNodeData().getTaxonomy().setTaxonomyCode( new_value );
284                                 break;
285                             case TAXONOMY_SCIENTIFIC_NAME:
286                                 if ( PhylogenyDecorator.VERBOSE ) {
287                                     System.out.println( name + ": " + new_value );
288                                 }
289                                 AptxUtil.ensurePresenceOfTaxonomy( node );
290                                 node.getNodeData().getTaxonomy().setScientificName( new_value );
291                                 break;
292                             case SEQUENCE_NAME:
293                                 if ( PhylogenyDecorator.VERBOSE ) {
294                                     System.out.println( name + ": " + new_value );
295                                 }
296                                 if ( !node.getNodeData().isHasSequence() ) {
297                                     node.getNodeData().setSequence( new Sequence() );
298                                 }
299                                 node.getNodeData().getSequence().setName( new_value );
300                                 break;
301                             case NODE_NAME:
302                                 if ( PhylogenyDecorator.VERBOSE ) {
303                                     System.out.print( name + " -> " );
304                                 }
305                                 if ( cut_name_after_space ) {
306                                     if ( PhylogenyDecorator.VERBOSE ) {
307                                         System.out.print( new_value + " -> " );
308                                     }
309                                     new_value = PhylogenyDecorator.deleteAtFirstSpace( new_value );
310                                 }
311                                 else if ( process_name_intelligently ) {
312                                     if ( PhylogenyDecorator.VERBOSE ) {
313                                         System.out.print( new_value + " -> " );
314                                     }
315                                     new_value = PhylogenyDecorator.processNameIntelligently( new_value );
316                                 }
317                                 else if ( process_similar_to ) {
318                                     if ( PhylogenyDecorator.VERBOSE ) {
319                                         System.out.print( new_value + " -> " );
320                                     }
321                                     new_value = PhylogenyDecorator.processSimilarTo( new_value );
322                                 }
323                                 if ( PhylogenyDecorator.SANITIZE ) {
324                                     new_value = PhylogenyDecorator.sanitize( new_value );
325                                 }
326                                 if ( PhylogenyDecorator.VERBOSE ) {
327                                     System.out.println( new_value );
328                                 }
329                                 node.setName( new_value );
330                                 break;
331                             default:
332                                 throw new RuntimeException( "unknown field \"" + field + "\"" );
333                         }
334                         if ( move_domain_numbers_at_end_to_middle && ( field != FIELD.NODE_NAME ) ) {
335                             node.setName( moveDomainNumbersAtEnd( node.getName() ) );
336                         }
337                     }
338                 }
339                 else if ( picky ) {
340                     throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
341                 }
342             }
343         }
344     }
345
346     public static void decorate( final Phylogeny[] phylogenies,
347                                  final Map<String, Map<String, String>> map,
348                                  final boolean picky,
349                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
350             throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException {
351         for( int i = 0; i < phylogenies.length; ++i ) {
352             PhylogenyDecorator.decorate( phylogenies[ i ],
353                                          map,
354                                          picky,
355                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map );
356         }
357     }
358
359     public static void decorate( final Phylogeny[] phylogenies,
360                                  final Map<String, String> map,
361                                  final FIELD field,
362                                  final boolean extract_bracketed_scientific_name,
363                                  final boolean picky,
364                                  final boolean cut_name_after_space,
365                                  final boolean process_name_intelligently,
366                                  final boolean process_similar_to,
367                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
368                                  final boolean move_domain_numbers_at_end_to_middle ) throws IllegalArgumentException,
369             NHXFormatException, PhyloXmlDataFormatException {
370         for( int i = 0; i < phylogenies.length; ++i ) {
371             PhylogenyDecorator.decorate( phylogenies[ i ],
372                                          map,
373                                          field,
374                                          extract_bracketed_scientific_name,
375                                          picky,
376                                          cut_name_after_space,
377                                          process_name_intelligently,
378                                          process_similar_to,
379                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
380                                          move_domain_numbers_at_end_to_middle );
381         }
382     }
383
384     public static void decorate( final Phylogeny[] phylogenies,
385                                  final Map<String, String> map,
386                                  final FIELD field,
387                                  final boolean extract_bracketed_scientific_name,
388                                  final boolean picky,
389                                  final Map<String, String> intermediate_map,
390                                  final boolean cut_name_after_space,
391                                  final boolean process_name_intelligently,
392                                  final boolean process_similar_to,
393                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
394                                  final boolean move_domain_numbers_at_end_to_middle ) throws IllegalArgumentException,
395             NHXFormatException, PhyloXmlDataFormatException {
396         for( int i = 0; i < phylogenies.length; ++i ) {
397             PhylogenyDecorator.decorate( phylogenies[ i ],
398                                          map,
399                                          field,
400                                          extract_bracketed_scientific_name,
401                                          picky,
402                                          intermediate_map,
403                                          cut_name_after_space,
404                                          process_name_intelligently,
405                                          process_similar_to,
406                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
407                                          move_domain_numbers_at_end_to_middle );
408         }
409     }
410
411     private static String deleteAtFirstSpace( final String name ) {
412         final int first_space = name.indexOf( " " );
413         if ( first_space > 1 ) {
414             return name.substring( 0, first_space ).trim();
415         }
416         return name;
417     }
418
419     private static void extractBracketedScientificNames( final PhylogenyNode node, final String new_value ) {
420         final int i = new_value.lastIndexOf( "[" );
421         final String scientific_name = new_value.substring( i + 1, new_value.length() - 1 );
422         AptxUtil.ensurePresenceOfTaxonomy( node );
423         node.getNodeData().getTaxonomy().setScientificName( scientific_name );
424     }
425
426     private static String extractIntermediate( final Map<String, String> intermediate_map, final String name ) {
427         String new_name = null;
428         if ( PhylogenyDecorator.VERBOSE ) {
429             System.out.print( name + " => " );
430         }
431         if ( intermediate_map.containsKey( name ) ) {
432             new_name = intermediate_map.get( name );
433             if ( ForesterUtil.isEmpty( new_name ) ) {
434                 throw new IllegalArgumentException( "\"" + name + "\" maps to null or empty string in secondary map" );
435             }
436         }
437         else {
438             throw new IllegalArgumentException( "\"" + name + "\" not found in name secondary map" );
439         }
440         if ( PhylogenyDecorator.VERBOSE ) {
441             System.out.println( new_name + "  " );
442         }
443         return new_name;
444     }
445
446     private static String moveDomainNumbersAtEnd( final String node_name ) {
447         final Matcher m = NODENAME_SEQNUMBER_TAXDOMAINNUMBER.matcher( node_name );
448         if ( m.matches() ) {
449             final String seq_number = m.group( 1 );
450             final String tax = m.group( 2 );
451             final String domain_number = m.group( 3 );
452             return seq_number + "_[" + domain_number + "]_" + tax;
453         }
454         else {
455             return node_name;
456         }
457     }
458
459     public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
460             throws IOException {
461         final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
462         BasicTable<String> mapping_table = null;
463         mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false );
464         for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
465             final Map<String, String> row_map = new HashMap<String, String>();
466             String name = null;
467             for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) {
468                 final String table_cell = mapping_table.getValue( col, row );
469                 if ( col == 0 ) {
470                     name = table_cell;
471                 }
472                 else if ( table_cell != null ) {
473                     final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) );
474                     final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() );
475                     row_map.put( key, val );
476                 }
477             }
478             map.put( name, row_map );
479         }
480         return map;
481     }
482
483     private static String processNameIntelligently( final String name ) {
484         final String[] s = name.split( " " );
485         if ( s.length < 2 ) {
486             return name;
487         }
488         else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "|" ) > 0 ) ) {
489             return s[ 0 ];
490         }
491         else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "|" ) > 0 ) ) {
492             return s[ 1 ];
493         }
494         else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "." ) > 0 ) ) {
495             return s[ 0 ];
496         }
497         else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "." ) > 0 ) ) {
498             return s[ 1 ];
499         }
500         else if ( s[ 0 ].indexOf( "_" ) > 0 ) {
501             return s[ 0 ];
502         }
503         else if ( s[ 1 ].indexOf( "_" ) > 0 ) {
504             return s[ 1 ];
505         }
506         else {
507             return s[ 0 ];
508         }
509     }
510
511     private static String processSimilarTo( final String name ) {
512         final int i = name.toLowerCase().indexOf( "similar to" );
513         String similar_to = "";
514         if ( i >= 0 ) {
515             similar_to = " similarity=" + name.substring( i + 10 ).trim();
516         }
517         final String pi = processNameIntelligently( name );
518         return pi + similar_to;
519     }
520
521     private static String sanitize( String s ) {
522         s = s.replace( ' ', '_' );
523         s = s.replace( '(', '{' );
524         s = s.replace( ')', '}' );
525         s = s.replace( '[', '{' );
526         s = s.replace( ']', '}' );
527         s = s.replace( ',', '_' );
528         return s;
529     }
530
531     public static enum FIELD {
532         NODE_NAME, SEQUENCE_ANNOTATION_DESC, DOMAIN_STRUCTURE, TAXONOMY_CODE, TAXONOMY_SCIENTIFIC_NAME, SEQUENCE_NAME;
533     }
534 }