b6bc7051615510df0f8efb323c148e54ef67da9d
[jalview.git] / forester / java / src / org / forester / tools / PhylogenyDecorator.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25
26 package org.forester.tools;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.util.HashMap;
31 import java.util.Map;
32
33 import org.forester.io.parsers.nhx.NHXFormatException;
34 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
35 import org.forester.phylogeny.Phylogeny;
36 import org.forester.phylogeny.PhylogenyNode;
37 import org.forester.phylogeny.data.Accession;
38 import org.forester.phylogeny.data.Annotation;
39 import org.forester.phylogeny.data.DomainArchitecture;
40 import org.forester.phylogeny.data.Identifier;
41 import org.forester.phylogeny.data.Sequence;
42 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
43 import org.forester.util.BasicTable;
44 import org.forester.util.BasicTableParser;
45 import org.forester.util.ForesterUtil;
46
47 public final class PhylogenyDecorator {
48
49     // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb:
50     final private static String TP_TAXONOMY_CODE        = "TAXONOMY_CODE";
51     final private static String TP_TAXONOMY_ID          = "TAXONOMY_ID";
52     final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER";
53     final private static String TP_TAXONOMY_SN          = "TAXONOMY_SN";
54     final private static String TP_TAXONOMY_CN          = "TAXONOMY_CN";
55     final private static String TP_TAXONOMY_SYN         = "TAXONOMY_SYN";
56     final private static String TP_SEQ_SYMBOL           = "SEQ_SYMBOL";
57     final private static String TP_SEQ_ACCESSION        = "SEQ_ACCESSION";
58     final private static String TP_SEQ_ACCESSION_SOURCE = "SEQ_ACCESSION_SOURCE";
59     final private static String TP_SEQ_ANNOTATION_DESC  = "SEQ_ANNOTATION_DESC";
60     final private static String TP_SEQ_ANNOTATION_REF   = "SEQ_ANNOTATION_REF";
61     final private static String TP_SEQ_MOL_SEQ          = "SEQ_MOL_SEQ";
62     final private static String TP_SEQ_NAME             = "SEQ_NAME";
63     final private static String TP_NODE_NAME            = "NODE_NAME";
64     public final static boolean SANITIZE                = false;
65     public final static boolean VERBOSE                 = true;
66
67     private PhylogenyDecorator() {
68         // Not needed.
69     }
70
71     public static void decorate( final Phylogeny phylogeny,
72                                  final Map<String, Map<String, String>> map,
73                                  final boolean picky,
74                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
75             throws IllegalArgumentException, PhyloXmlDataFormatException {
76         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
77             final PhylogenyNode node = iter.next();
78             final String name = node.getName();
79             if ( !ForesterUtil.isEmpty( name ) ) {
80                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
81                     Map<String, String> new_values = map.get( name );
82                     int x = 0;
83                     while ( ( new_values == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
84                             && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
85                         new_values = map.get( name.substring( 0, name.length() - x ) );
86                         ++x;
87                     }
88                     if ( new_values != null ) {
89                         if ( new_values.containsKey( TP_TAXONOMY_CODE ) ) {
90                             ForesterUtil.ensurePresenceOfTaxonomy( node );
91                             node.getNodeData().getTaxonomy().setTaxonomyCode( new_values.get( TP_TAXONOMY_CODE ) );
92                         }
93                         if ( new_values.containsKey( TP_TAXONOMY_ID )
94                                 && new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) {
95                             ForesterUtil.ensurePresenceOfTaxonomy( node );
96                             node.getNodeData()
97                                     .getTaxonomy()
98                                     .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ),
99                                                                     new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) );
100                         }
101                         else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) {
102                             ForesterUtil.ensurePresenceOfTaxonomy( node );
103                             node.getNodeData().getTaxonomy()
104                                     .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) );
105                         }
106                         if ( new_values.containsKey( TP_TAXONOMY_SN ) ) {
107                             ForesterUtil.ensurePresenceOfTaxonomy( node );
108                             node.getNodeData().getTaxonomy().setScientificName( new_values.get( TP_TAXONOMY_SN ) );
109                         }
110                         if ( new_values.containsKey( TP_TAXONOMY_CN ) ) {
111                             ForesterUtil.ensurePresenceOfTaxonomy( node );
112                             node.getNodeData().getTaxonomy().setCommonName( new_values.get( TP_TAXONOMY_CN ) );
113                         }
114                         if ( new_values.containsKey( TP_TAXONOMY_SYN ) ) {
115                             ForesterUtil.ensurePresenceOfTaxonomy( node );
116                             node.getNodeData().getTaxonomy().getSynonyms().add( new_values.get( TP_TAXONOMY_SYN ) );
117                         }
118                         if ( new_values.containsKey( TP_SEQ_ACCESSION )
119                                 && new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) {
120                             ForesterUtil.ensurePresenceOfSequence( node );
121                             node.getNodeData()
122                                     .getSequence()
123                                     .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ),
124                                                                   new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) );
125                         }
126                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) {
127                             ForesterUtil.ensurePresenceOfSequence( node );
128                             final Annotation ann = new Annotation();
129                             ann.setDesc( new_values.get( TP_SEQ_ANNOTATION_DESC ) );
130                             node.getNodeData().getSequence().addAnnotation( ann );
131                         }
132                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_REF ) ) {
133                             ForesterUtil.ensurePresenceOfSequence( node );
134                             final Annotation ann = new Annotation( new_values.get( TP_SEQ_ANNOTATION_REF ) );
135                             node.getNodeData().getSequence().addAnnotation( ann );
136                         }
137                         if ( new_values.containsKey( TP_SEQ_SYMBOL ) ) {
138                             ForesterUtil.ensurePresenceOfSequence( node );
139                             node.getNodeData().getSequence().setSymbol( new_values.get( TP_SEQ_SYMBOL ) );
140                         }
141                         if ( new_values.containsKey( TP_SEQ_NAME ) ) {
142                             ForesterUtil.ensurePresenceOfSequence( node );
143                             node.getNodeData().getSequence().setName( new_values.get( TP_SEQ_NAME ) );
144                         }
145                         if ( new_values.containsKey( TP_SEQ_MOL_SEQ ) ) {
146                             ForesterUtil.ensurePresenceOfSequence( node );
147                             node.getNodeData().getSequence().setMolecularSequence( new_values.get( TP_SEQ_MOL_SEQ ) );
148                         }
149                         if ( new_values.containsKey( TP_NODE_NAME ) ) {
150                             node.setName( new_values.get( TP_NODE_NAME ) );
151                         }
152                     } // if ( new_values != null ) 
153                 } // if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) )
154                 else if ( picky ) {
155                     throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
156                 }
157             }
158         }
159     }
160
161     public static void decorate( final Phylogeny phylogeny,
162                                  final Map<String, String> map,
163                                  final FIELD field,
164                                  final boolean extract_bracketed_scientific_name,
165                                  final boolean extract_bracketed_tax_code,
166                                  final boolean picky,
167                                  final boolean cut_name_after_space,
168                                  final boolean process_name_intelligently,
169                                  final boolean process_similar_to,
170                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
171                                  final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
172             PhyloXmlDataFormatException {
173         PhylogenyDecorator.decorate( phylogeny,
174                                      map,
175                                      field,
176                                      extract_bracketed_scientific_name,
177                                      extract_bracketed_tax_code,
178                                      picky,
179                                      null,
180                                      cut_name_after_space,
181                                      process_name_intelligently,
182                                      process_similar_to,
183                                      numbers_of_chars_allowed_to_remove_if_not_found_in_map,
184                                      trim_after_tilde );
185     }
186
187     /**
188      * 
189      * 
190      * 
191      * @param phylogeny
192      * @param map
193      *            maps names (in phylogeny) to new values if intermediate_map is
194      *            null otherwise maps intermediate value to new value
195      * @param field
196      * @param picky
197      * @param intermediate_map
198      *            maps name (in phylogeny) to a intermediate value
199      * @throws IllegalArgumentException
200      * @throws PhyloXmlDataFormatException 
201      */
202     public static void decorate( final Phylogeny phylogeny,
203                                  final Map<String, String> map,
204                                  final FIELD field,
205                                  final boolean extract_bracketed_scientific_name,
206                                  final boolean extract_bracketed_tax_code,
207                                  final boolean picky,
208                                  final Map<String, String> intermediate_map,
209                                  final boolean cut_name_after_space,
210                                  final boolean process_name_intelligently,
211                                  final boolean process_similar_to,
212                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
213                                  final boolean trim_after_tilde ) throws IllegalArgumentException,
214             PhyloXmlDataFormatException {
215         if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
216             throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" );
217         }
218         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
219             final PhylogenyNode node = iter.next();
220             String name = node.getName();
221             String tilde_annotation = null;
222             if ( trim_after_tilde && ( name.indexOf( '~' ) > 0 ) ) {
223                 final int ti = name.indexOf( '~' );
224                 tilde_annotation = name.substring( ti );
225                 name = name.substring( 0, ti );
226             }
227             if ( !ForesterUtil.isEmpty( name ) ) {
228                 if ( intermediate_map != null ) {
229                     name = PhylogenyDecorator.extractIntermediate( intermediate_map, name );
230                 }
231                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
232                     String new_value = map.get( name );
233                     int x = 0;
234                     while ( ( new_value == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
235                             && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
236                         new_value = map.get( name.substring( 0, name.length() - x ) );
237                         ++x;
238                     }
239                     if ( new_value != null ) {
240                         new_value = new_value.trim();
241                         new_value.replaceAll( "/\\s+/", " " );
242                         if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) {
243                             new_value = extractBracketedScientificNames( node, new_value );
244                         }
245                         else if ( extract_bracketed_tax_code && new_value.endsWith( "]" ) ) {
246                             new_value = extractBracketedTaxCodes( node, new_value );
247                         }
248                         switch ( field ) {
249                             case SEQUENCE_ANNOTATION_DESC:
250                                 if ( PhylogenyDecorator.VERBOSE ) {
251                                     System.out.println( name + ": " + new_value );
252                                 }
253                                 if ( !node.getNodeData().isHasSequence() ) {
254                                     node.getNodeData().setSequence( new Sequence() );
255                                 }
256                                 final Annotation annotation = new Annotation( "?" );
257                                 annotation.setDesc( new_value );
258                                 node.getNodeData().getSequence().addAnnotation( annotation );
259                                 break;
260                             case DOMAIN_STRUCTURE:
261                                 if ( PhylogenyDecorator.VERBOSE ) {
262                                     System.out.println( name + ": " + new_value );
263                                 }
264                                 if ( !node.getNodeData().isHasSequence() ) {
265                                     node.getNodeData().setSequence( new Sequence() );
266                                 }
267                                 node.getNodeData().getSequence()
268                                         .setDomainArchitecture( new DomainArchitecture( new_value ) );
269                                 break;
270                             case TAXONOMY_CODE:
271                                 if ( PhylogenyDecorator.VERBOSE ) {
272                                     System.out.println( name + ": " + new_value );
273                                 }
274                                 ForesterUtil.ensurePresenceOfTaxonomy( node );
275                                 node.getNodeData().getTaxonomy().setTaxonomyCode( new_value );
276                                 break;
277                             case TAXONOMY_SCIENTIFIC_NAME:
278                                 if ( PhylogenyDecorator.VERBOSE ) {
279                                     System.out.println( name + ": " + new_value );
280                                 }
281                                 ForesterUtil.ensurePresenceOfTaxonomy( node );
282                                 node.getNodeData().getTaxonomy().setScientificName( new_value );
283                                 break;
284                             case SEQUENCE_NAME:
285                                 if ( trim_after_tilde ) {
286                                     new_value = addTildeAnnotation( tilde_annotation, new_value );
287                                 }
288                                 if ( PhylogenyDecorator.VERBOSE ) {
289                                     System.out.println( name + ": " + new_value );
290                                 }
291                                 if ( !node.getNodeData().isHasSequence() ) {
292                                     node.getNodeData().setSequence( new Sequence() );
293                                 }
294                                 node.getNodeData().getSequence().setName( new_value );
295                                 break;
296                             case NODE_NAME:
297                                 if ( PhylogenyDecorator.VERBOSE ) {
298                                     System.out.print( name + " -> " );
299                                 }
300                                 if ( cut_name_after_space ) {
301                                     if ( PhylogenyDecorator.VERBOSE ) {
302                                         System.out.print( new_value + " -> " );
303                                     }
304                                     new_value = PhylogenyDecorator.deleteAtFirstSpace( new_value );
305                                 }
306                                 else if ( process_name_intelligently ) {
307                                     if ( PhylogenyDecorator.VERBOSE ) {
308                                         System.out.print( new_value + " -> " );
309                                     }
310                                     new_value = PhylogenyDecorator.processNameIntelligently( new_value );
311                                 }
312                                 else if ( process_similar_to ) {
313                                     if ( PhylogenyDecorator.VERBOSE ) {
314                                         System.out.print( new_value + " -> " );
315                                     }
316                                     new_value = PhylogenyDecorator.processSimilarTo( new_value );
317                                 }
318                                 if ( PhylogenyDecorator.SANITIZE ) {
319                                     new_value = PhylogenyDecorator.sanitize( new_value );
320                                 }
321                                 if ( trim_after_tilde ) {
322                                     new_value = addTildeAnnotation( tilde_annotation, new_value );
323                                 }
324                                 if ( PhylogenyDecorator.VERBOSE ) {
325                                     System.out.println( new_value );
326                                 }
327                                 node.setName( new_value );
328                                 break;
329                             default:
330                                 throw new RuntimeException( "unknown field \"" + field + "\"" );
331                         }
332                     }
333                 }
334                 else if ( picky ) {
335                     throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
336                 }
337             }
338         }
339     }
340
341     private final static String addTildeAnnotation( final String tilde_annotation, final String new_value ) {
342         if ( ForesterUtil.isEmpty( tilde_annotation ) ) {
343             return new_value;
344         }
345         return new_value + tilde_annotation;
346     }
347
348     public static void decorate( final Phylogeny[] phylogenies,
349                                  final Map<String, Map<String, String>> map,
350                                  final boolean picky,
351                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
352             throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException {
353         for( final Phylogeny phylogenie : phylogenies ) {
354             PhylogenyDecorator
355                     .decorate( phylogenie, map, picky, numbers_of_chars_allowed_to_remove_if_not_found_in_map );
356         }
357     }
358
359     public static void decorate( final Phylogeny[] phylogenies,
360                                  final Map<String, String> map,
361                                  final FIELD field,
362                                  final boolean extract_bracketed_scientific_name,
363                                  final boolean extract_bracketed_tax_code,
364                                  final boolean picky,
365                                  final boolean cut_name_after_space,
366                                  final boolean process_name_intelligently,
367                                  final boolean process_similar_to,
368                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
369                                  final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
370             PhyloXmlDataFormatException {
371         for( final Phylogeny phylogenie : phylogenies ) {
372             PhylogenyDecorator.decorate( phylogenie,
373                                          map,
374                                          field,
375                                          extract_bracketed_scientific_name,
376                                          extract_bracketed_tax_code,
377                                          picky,
378                                          cut_name_after_space,
379                                          process_name_intelligently,
380                                          process_similar_to,
381                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
382                                          trim_after_tilde );
383         }
384     }
385
386     public static void decorate( final Phylogeny[] phylogenies,
387                                  final Map<String, String> map,
388                                  final FIELD field,
389                                  final boolean extract_bracketed_scientific_name,
390                                  final boolean extract_bracketed_tax_code,
391                                  final boolean picky,
392                                  final Map<String, String> intermediate_map,
393                                  final boolean cut_name_after_space,
394                                  final boolean process_name_intelligently,
395                                  final boolean process_similar_to,
396                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
397                                  final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
398             PhyloXmlDataFormatException {
399         for( final Phylogeny phylogenie : phylogenies ) {
400             PhylogenyDecorator.decorate( phylogenie,
401                                          map,
402                                          field,
403                                          extract_bracketed_scientific_name,
404                                          extract_bracketed_tax_code,
405                                          picky,
406                                          intermediate_map,
407                                          cut_name_after_space,
408                                          process_name_intelligently,
409                                          process_similar_to,
410                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
411                                          trim_after_tilde );
412         }
413     }
414
415     public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
416             throws IOException {
417         final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
418         BasicTable<String> mapping_table = null;
419         mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false, false );
420         for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
421             final Map<String, String> row_map = new HashMap<String, String>();
422             String name = null;
423             for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) {
424                 final String table_cell = mapping_table.getValue( col, row );
425                 if ( col == 0 ) {
426                     name = table_cell;
427                 }
428                 else if ( table_cell != null ) {
429                     final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) );
430                     final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() );
431                     row_map.put( key, val );
432                 }
433             }
434             map.put( name, row_map );
435         }
436         return map;
437     }
438
439     private static String deleteAtFirstSpace( final String name ) {
440         final int first_space = name.indexOf( " " );
441         if ( first_space > 1 ) {
442             return name.substring( 0, first_space ).trim();
443         }
444         return name;
445     }
446
447     private static String extractBracketedScientificNames( final PhylogenyNode node, final String new_value ) {
448         final int i = new_value.lastIndexOf( "[" );
449         final String scientific_name = new_value.substring( i + 1, new_value.length() - 1 );
450         ForesterUtil.ensurePresenceOfTaxonomy( node );
451         node.getNodeData().getTaxonomy().setScientificName( scientific_name );
452         return new_value.substring( 0, i - 1 ).trim();
453     }
454
455     private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) {
456         final int i = new_value.lastIndexOf( "[" );
457         String tc = new_value.substring( i + 1, new_value.length() - 1 );
458         if ( tc.length() == 6 ) {
459             tc = tc.substring( 0, 5 );
460         }
461         ForesterUtil.ensurePresenceOfTaxonomy( node );
462         try {
463             node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
464         }
465         catch ( final PhyloXmlDataFormatException e ) {
466             throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
467         }
468         return new_value.substring( 0, i - 1 ).trim();
469     }
470
471     private static String extractIntermediate( final Map<String, String> intermediate_map, final String name ) {
472         String new_name = null;
473         if ( PhylogenyDecorator.VERBOSE ) {
474             System.out.print( name + " => " );
475         }
476         if ( intermediate_map.containsKey( name ) ) {
477             new_name = intermediate_map.get( name );
478             if ( ForesterUtil.isEmpty( new_name ) ) {
479                 throw new IllegalArgumentException( "\"" + name + "\" maps to null or empty string in secondary map" );
480             }
481         }
482         else {
483             throw new IllegalArgumentException( "\"" + name + "\" not found in name secondary map" );
484         }
485         if ( PhylogenyDecorator.VERBOSE ) {
486             System.out.println( new_name + "  " );
487         }
488         return new_name;
489     }
490
491     private static String processNameIntelligently( final String name ) {
492         final String[] s = name.split( " " );
493         if ( s.length < 2 ) {
494             return name;
495         }
496         else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "|" ) > 0 ) ) {
497             return s[ 0 ];
498         }
499         else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "|" ) > 0 ) ) {
500             return s[ 1 ];
501         }
502         else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "." ) > 0 ) ) {
503             return s[ 0 ];
504         }
505         else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "." ) > 0 ) ) {
506             return s[ 1 ];
507         }
508         else if ( s[ 0 ].indexOf( "_" ) > 0 ) {
509             return s[ 0 ];
510         }
511         else if ( s[ 1 ].indexOf( "_" ) > 0 ) {
512             return s[ 1 ];
513         }
514         else {
515             return s[ 0 ];
516         }
517     }
518
519     private static String processSimilarTo( final String name ) {
520         final int i = name.toLowerCase().indexOf( "similar to" );
521         String similar_to = "";
522         if ( i >= 0 ) {
523             similar_to = " similarity=" + name.substring( i + 10 ).trim();
524         }
525         final String pi = processNameIntelligently( name );
526         return pi + similar_to;
527     }
528
529     private static String sanitize( String s ) {
530         s = s.replace( ' ', '_' );
531         s = s.replace( '(', '{' );
532         s = s.replace( ')', '}' );
533         s = s.replace( '[', '{' );
534         s = s.replace( ']', '}' );
535         s = s.replace( ',', '_' );
536         return s;
537     }
538
539     public static enum FIELD {
540         NODE_NAME, SEQUENCE_ANNOTATION_DESC, DOMAIN_STRUCTURE, TAXONOMY_CODE, TAXONOMY_SCIENTIFIC_NAME, SEQUENCE_NAME;
541     }
542 }