acf6b393dae81de8e6b185235949086d804a1373
[jalview.git] / forester / java / src / org / forester / tools / PhylogenyDecorator.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.tools;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.util.HashMap;
31 import java.util.Map;
32 import java.util.regex.Matcher;
33 import java.util.regex.Pattern;
34
35 import org.forester.archaeopteryx.AptxUtil;
36 import org.forester.io.parsers.nhx.NHXFormatException;
37 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
38 import org.forester.phylogeny.Phylogeny;
39 import org.forester.phylogeny.PhylogenyNode;
40 import org.forester.phylogeny.data.Accession;
41 import org.forester.phylogeny.data.Annotation;
42 import org.forester.phylogeny.data.DomainArchitecture;
43 import org.forester.phylogeny.data.Identifier;
44 import org.forester.phylogeny.data.Sequence;
45 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
46 import org.forester.util.BasicTable;
47 import org.forester.util.BasicTableParser;
48 import org.forester.util.ForesterUtil;
49
50 public final class PhylogenyDecorator {
51
52     // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb:
53     final private static String  TP_TAXONOMY_CODE                   = "TAXONOMY_CODE";
54     final private static String  TP_TAXONOMY_ID                     = "TAXONOMY_ID";
55     final private static String  TP_TAXONOMY_ID_PROVIDER            = "TAXONOMY_ID_PROVIDER";
56     final private static String  TP_TAXONOMY_SN                     = "TAXONOMY_SN";
57     final private static String  TP_TAXONOMY_CN                     = "TAXONOMY_CN";
58     final private static String  TP_TAXONOMY_SYN                    = "TAXONOMY_SYN";
59     final private static String  TP_SEQ_SYMBOL                      = "SEQ_SYMBOL";
60     final private static String  TP_SEQ_ACCESSION                   = "SEQ_ACCESSION";
61     final private static String  TP_SEQ_ACCESSION_SOURCE            = "SEQ_ACCESSION_SOURCE";
62     final private static String  TP_SEQ_ANNOTATION_DESC             = "SEQ_ANNOTATION_DESC";
63     final private static String  TP_SEQ_ANNOTATION_REF              = "SEQ_ANNOTATION_REF";
64     final private static String  TP_SEQ_MOL_SEQ                     = "SEQ_MOL_SEQ";
65     final private static String  TP_SEQ_NAME                        = "SEQ_NAME";
66     final private static String  TP_NODE_NAME                       = "NODE_NAME";
67     final private static Pattern NODENAME_SEQNUMBER_TAXDOMAINNUMBER = Pattern
68                                                                             .compile( "^([a-fA-Z0-9]{1,5})_([A-Z0-9]{2,4}[A-Z])(\\d{1,4})$" );
69     public final static boolean  SANITIZE                           = false;
70     public final static boolean  VERBOSE                            = true;
71     private static final boolean CUT                                = true;
72
73     private PhylogenyDecorator() {
74         // Not needed.
75     }
76
77     public static void decorate( final Phylogeny phylogeny,
78                                  final Map<String, Map<String, String>> map,
79                                  final boolean picky,
80                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
81             throws IllegalArgumentException, PhyloXmlDataFormatException {
82         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
83             final PhylogenyNode node = iter.next();
84             final String name = node.getName();
85             if ( !ForesterUtil.isEmpty( name ) ) {
86                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
87                     Map<String, String> new_values = map.get( name );
88                     int x = 0;
89                     while ( ( new_values == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
90                             && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
91                         new_values = map.get( name.substring( 0, name.length() - x ) );
92                         ++x;
93                     }
94                     if ( new_values != null ) {
95                         if ( new_values.containsKey( TP_TAXONOMY_CODE ) ) {
96                             AptxUtil.ensurePresenceOfTaxonomy( node );
97                             node.getNodeData().getTaxonomy().setTaxonomyCode( new_values.get( TP_TAXONOMY_CODE ) );
98                         }
99                         if ( new_values.containsKey( TP_TAXONOMY_ID )
100                                 && new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) {
101                             AptxUtil.ensurePresenceOfTaxonomy( node );
102                             node.getNodeData()
103                                     .getTaxonomy()
104                                     .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ),
105                                                                     new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) );
106                         }
107                         else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) {
108                             AptxUtil.ensurePresenceOfTaxonomy( node );
109                             node.getNodeData().getTaxonomy()
110                                     .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) );
111                         }
112                         if ( new_values.containsKey( TP_TAXONOMY_SN ) ) {
113                             AptxUtil.ensurePresenceOfTaxonomy( node );
114                             node.getNodeData().getTaxonomy().setScientificName( new_values.get( TP_TAXONOMY_SN ) );
115                         }
116                         if ( new_values.containsKey( TP_TAXONOMY_CN ) ) {
117                             AptxUtil.ensurePresenceOfTaxonomy( node );
118                             node.getNodeData().getTaxonomy().setCommonName( new_values.get( TP_TAXONOMY_CN ) );
119                         }
120                         if ( new_values.containsKey( TP_TAXONOMY_SYN ) ) {
121                             AptxUtil.ensurePresenceOfTaxonomy( node );
122                             node.getNodeData().getTaxonomy().getSynonyms().add( new_values.get( TP_TAXONOMY_SYN ) );
123                         }
124                         if ( new_values.containsKey( TP_SEQ_ACCESSION )
125                                 && new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) {
126                             AptxUtil.ensurePresenceOfSequence( node );
127                             node.getNodeData()
128                                     .getSequence()
129                                     .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ),
130                                                                   new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) );
131                         }
132                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) {
133                             AptxUtil.ensurePresenceOfSequence( node );
134                             final Annotation ann = new Annotation( "?" );
135                             ann.setDesc( new_values.get( TP_SEQ_ANNOTATION_DESC ) );
136                             node.getNodeData().getSequence().addAnnotation( ann );
137                         }
138                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_REF ) ) {
139                             AptxUtil.ensurePresenceOfSequence( node );
140                             final Annotation ann = new Annotation( new_values.get( TP_SEQ_ANNOTATION_REF ) );
141                             node.getNodeData().getSequence().addAnnotation( ann );
142                         }
143                         if ( new_values.containsKey( TP_SEQ_SYMBOL ) ) {
144                             AptxUtil.ensurePresenceOfSequence( node );
145                             node.getNodeData().getSequence().setSymbol( new_values.get( TP_SEQ_SYMBOL ) );
146                         }
147                         if ( new_values.containsKey( TP_SEQ_NAME ) ) {
148                             AptxUtil.ensurePresenceOfSequence( node );
149                             node.getNodeData().getSequence().setName( new_values.get( TP_SEQ_NAME ) );
150                         }
151                         if ( new_values.containsKey( TP_SEQ_MOL_SEQ ) ) {
152                             AptxUtil.ensurePresenceOfSequence( node );
153                             node.getNodeData().getSequence().setMolecularSequence( new_values.get( TP_SEQ_MOL_SEQ ) );
154                         }
155                         if ( new_values.containsKey( TP_NODE_NAME ) ) {
156                             node.setName( new_values.get( TP_NODE_NAME ) );
157                         }
158                     } // if ( new_values != null ) 
159                 } // if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) )
160                 else if ( picky ) {
161                     throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
162                 }
163             }
164         }
165     }
166
167     /**
168      * 
169      * 
170      * 
171      * 
172      * 
173      * @param phylogeny
174      * @param map
175      *            maps names (in phylogeny) to new values
176      * @param field
177      * @param picky
178      * @throws IllegalArgumentException
179      * @throws NHXFormatException
180      * @throws PhyloXmlDataFormatException 
181      */
182     public static void decorate( final Phylogeny phylogeny,
183                                  final Map<String, String> map,
184                                  final FIELD field,
185                                  final boolean extract_bracketed_scientific_name,
186                                  final boolean picky,
187                                  final boolean cut_name_after_space,
188                                  final boolean process_name_intelligently,
189                                  final boolean process_similar_to,
190                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
191                                  final boolean move_domain_numbers_at_end_to_middle ) throws IllegalArgumentException,
192             NHXFormatException, PhyloXmlDataFormatException {
193         PhylogenyDecorator.decorate( phylogeny,
194                                      map,
195                                      field,
196                                      extract_bracketed_scientific_name,
197                                      picky,
198                                      null,
199                                      cut_name_after_space,
200                                      process_name_intelligently,
201                                      process_similar_to,
202                                      numbers_of_chars_allowed_to_remove_if_not_found_in_map,
203                                      move_domain_numbers_at_end_to_middle );
204     }
205
206     /**
207      * 
208      * 
209      * 
210      * @param phylogeny
211      * @param map
212      *            maps names (in phylogeny) to new values if intermediate_map is
213      *            null otherwise maps intermediate value to new value
214      * @param field
215      * @param picky
216      * @param intermediate_map
217      *            maps name (in phylogeny) to a intermediate value
218      * @throws IllegalArgumentException
219      * @throws PhyloXmlDataFormatException 
220      */
221     public static void decorate( final Phylogeny phylogeny,
222                                  final Map<String, String> map,
223                                  final FIELD field,
224                                  final boolean extract_bracketed_scientific_name,
225                                  final boolean picky,
226                                  final Map<String, String> intermediate_map,
227                                  final boolean cut_name_after_space,
228                                  final boolean process_name_intelligently,
229                                  final boolean process_similar_to,
230                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
231                                  final boolean move_domain_numbers_at_end_to_middle ) throws IllegalArgumentException,
232             PhyloXmlDataFormatException {
233         if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
234             throw new IllegalArgumentException( "Attempt to extract bracketed scientific name together with data field pointing to scientific name" );
235         }
236         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
237             final PhylogenyNode node = iter.next();
238             String name = node.getName();
239             if ( !ForesterUtil.isEmpty( name ) ) {
240                 if ( intermediate_map != null ) {
241                     name = PhylogenyDecorator.extractIntermediate( intermediate_map, name );
242                 }
243                 // int space_index = name.indexOf( " " );
244                 //                if ( CUT && space_index > 0 ) {
245                 //                    int y = name.lastIndexOf( "|" );
246                 //                    name = name.substring( y + 1, space_index );
247                 //                }
248                 //                String new_value = null;
249                 //                for( String key : map.keySet() ) {
250                 //                    if ( key.indexOf( name ) >= 0 ) {
251                 //                        if ( new_value == null ) {
252                 //                            new_value = map.get( key );
253                 //                        }
254                 //                        else {
255                 //                            System.out.println( name + " is not unique" );
256                 //                            System.exit( -1 );
257                 //                        }
258                 //                    }
259                 //                }
260                 // if ( new_value != null ) {
261                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
262                     String new_value = map.get( name );
263                     int x = 0;
264                     while ( ( new_value == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
265                             && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
266                         new_value = map.get( name.substring( 0, name.length() - x ) );
267                         ++x;
268                     }
269                     if ( new_value != null ) {
270                         new_value = new_value.trim();
271                         new_value.replaceAll( "/\\s+/", " " );
272                         if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) {
273                             extractBracketedScientificNames( node, new_value );
274                         }
275                         switch ( field ) {
276                             case SEQUENCE_ANNOTATION_DESC:
277                                 if ( PhylogenyDecorator.VERBOSE ) {
278                                     System.out.println( name + ": " + new_value );
279                                 }
280                                 if ( !node.getNodeData().isHasSequence() ) {
281                                     node.getNodeData().setSequence( new Sequence() );
282                                 }
283                                 final Annotation annotation = new Annotation( "?" );
284                                 annotation.setDesc( new_value );
285                                 node.getNodeData().getSequence().addAnnotation( annotation );
286                                 break;
287                             case DOMAIN_STRUCTURE:
288                                 if ( PhylogenyDecorator.VERBOSE ) {
289                                     System.out.println( name + ": " + new_value );
290                                 }
291                                 if ( !node.getNodeData().isHasSequence() ) {
292                                     node.getNodeData().setSequence( new Sequence() );
293                                 }
294                                 node.getNodeData().getSequence()
295                                         .setDomainArchitecture( new DomainArchitecture( new_value ) );
296                                 break;
297                             case TAXONOMY_CODE:
298                                 if ( PhylogenyDecorator.VERBOSE ) {
299                                     System.out.println( name + ": " + new_value );
300                                 }
301                                 AptxUtil.ensurePresenceOfTaxonomy( node );
302                                 node.getNodeData().getTaxonomy().setTaxonomyCode( new_value );
303                                 break;
304                             case TAXONOMY_SCIENTIFIC_NAME:
305                                 if ( PhylogenyDecorator.VERBOSE ) {
306                                     System.out.println( name + ": " + new_value );
307                                 }
308                                 AptxUtil.ensurePresenceOfTaxonomy( node );
309                                 node.getNodeData().getTaxonomy().setScientificName( new_value );
310                                 break;
311                             case SEQUENCE_NAME:
312                                 if ( PhylogenyDecorator.VERBOSE ) {
313                                     System.out.println( name + ": " + new_value );
314                                 }
315                                 if ( !node.getNodeData().isHasSequence() ) {
316                                     node.getNodeData().setSequence( new Sequence() );
317                                 }
318                                 node.getNodeData().getSequence().setName( new_value );
319                                 break;
320                             case NODE_NAME:
321                                 if ( PhylogenyDecorator.VERBOSE ) {
322                                     System.out.print( name + " -> " );
323                                 }
324                                 if ( cut_name_after_space ) {
325                                     if ( PhylogenyDecorator.VERBOSE ) {
326                                         System.out.print( new_value + " -> " );
327                                     }
328                                     new_value = PhylogenyDecorator.deleteAtFirstSpace( new_value );
329                                 }
330                                 else if ( process_name_intelligently ) {
331                                     if ( PhylogenyDecorator.VERBOSE ) {
332                                         System.out.print( new_value + " -> " );
333                                     }
334                                     new_value = PhylogenyDecorator.processNameIntelligently( new_value );
335                                 }
336                                 else if ( process_similar_to ) {
337                                     if ( PhylogenyDecorator.VERBOSE ) {
338                                         System.out.print( new_value + " -> " );
339                                     }
340                                     new_value = PhylogenyDecorator.processSimilarTo( new_value );
341                                 }
342                                 if ( PhylogenyDecorator.SANITIZE ) {
343                                     new_value = PhylogenyDecorator.sanitize( new_value );
344                                 }
345                                 if ( PhylogenyDecorator.VERBOSE ) {
346                                     System.out.println( new_value );
347                                 }
348                                 node.setName( new_value );
349                                 break;
350                             default:
351                                 throw new RuntimeException( "unknown field \"" + field + "\"" );
352                         }
353                         if ( move_domain_numbers_at_end_to_middle && ( field != FIELD.NODE_NAME ) ) {
354                             node.setName( moveDomainNumbersAtEnd( node.getName() ) );
355                         }
356                     }
357                 }
358                 else if ( picky ) {
359                     throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
360                 }
361             }
362         }
363     }
364
365     public static void decorate( final Phylogeny[] phylogenies,
366                                  final Map<String, Map<String, String>> map,
367                                  final boolean picky,
368                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
369             throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException {
370         for( int i = 0; i < phylogenies.length; ++i ) {
371             PhylogenyDecorator.decorate( phylogenies[ i ],
372                                          map,
373                                          picky,
374                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map );
375         }
376     }
377
378     public static void decorate( final Phylogeny[] phylogenies,
379                                  final Map<String, String> map,
380                                  final FIELD field,
381                                  final boolean extract_bracketed_scientific_name,
382                                  final boolean picky,
383                                  final boolean cut_name_after_space,
384                                  final boolean process_name_intelligently,
385                                  final boolean process_similar_to,
386                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
387                                  final boolean move_domain_numbers_at_end_to_middle ) throws IllegalArgumentException,
388             NHXFormatException, PhyloXmlDataFormatException {
389         for( int i = 0; i < phylogenies.length; ++i ) {
390             PhylogenyDecorator.decorate( phylogenies[ i ],
391                                          map,
392                                          field,
393                                          extract_bracketed_scientific_name,
394                                          picky,
395                                          cut_name_after_space,
396                                          process_name_intelligently,
397                                          process_similar_to,
398                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
399                                          move_domain_numbers_at_end_to_middle );
400         }
401     }
402
403     public static void decorate( final Phylogeny[] phylogenies,
404                                  final Map<String, String> map,
405                                  final FIELD field,
406                                  final boolean extract_bracketed_scientific_name,
407                                  final boolean picky,
408                                  final Map<String, String> intermediate_map,
409                                  final boolean cut_name_after_space,
410                                  final boolean process_name_intelligently,
411                                  final boolean process_similar_to,
412                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
413                                  final boolean move_domain_numbers_at_end_to_middle ) throws IllegalArgumentException,
414             NHXFormatException, PhyloXmlDataFormatException {
415         for( int i = 0; i < phylogenies.length; ++i ) {
416             PhylogenyDecorator.decorate( phylogenies[ i ],
417                                          map,
418                                          field,
419                                          extract_bracketed_scientific_name,
420                                          picky,
421                                          intermediate_map,
422                                          cut_name_after_space,
423                                          process_name_intelligently,
424                                          process_similar_to,
425                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
426                                          move_domain_numbers_at_end_to_middle );
427         }
428     }
429
430     private static String deleteAtFirstSpace( final String name ) {
431         final int first_space = name.indexOf( " " );
432         if ( first_space > 1 ) {
433             return name.substring( 0, first_space ).trim();
434         }
435         return name;
436     }
437
438     private static void extractBracketedScientificNames( final PhylogenyNode node, final String new_value ) {
439         final int i = new_value.lastIndexOf( "[" );
440         final String scientific_name = new_value.substring( i + 1, new_value.length() - 1 );
441         AptxUtil.ensurePresenceOfTaxonomy( node );
442         node.getNodeData().getTaxonomy().setScientificName( scientific_name );
443     }
444
445     private static String extractIntermediate( final Map<String, String> intermediate_map, final String name ) {
446         String new_name = null;
447         if ( PhylogenyDecorator.VERBOSE ) {
448             System.out.print( name + " => " );
449         }
450         if ( intermediate_map.containsKey( name ) ) {
451             new_name = intermediate_map.get( name );
452             if ( ForesterUtil.isEmpty( new_name ) ) {
453                 throw new IllegalArgumentException( "\"" + name + "\" maps to null or empty string in secondary map" );
454             }
455         }
456         else {
457             throw new IllegalArgumentException( "\"" + name + "\" not found in name secondary map" );
458         }
459         if ( PhylogenyDecorator.VERBOSE ) {
460             System.out.println( new_name + "  " );
461         }
462         return new_name;
463     }
464
465     private static String moveDomainNumbersAtEnd( final String node_name ) {
466         final Matcher m = NODENAME_SEQNUMBER_TAXDOMAINNUMBER.matcher( node_name );
467         if ( m.matches() ) {
468             final String seq_number = m.group( 1 );
469             final String tax = m.group( 2 );
470             final String domain_number = m.group( 3 );
471             return seq_number + "_[" + domain_number + "]_" + tax;
472         }
473         else {
474             return node_name;
475         }
476     }
477
478     public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
479             throws IOException {
480         final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
481         BasicTable<String> mapping_table = null;
482         mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false );
483         for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
484             final Map<String, String> row_map = new HashMap<String, String>();
485             String name = null;
486             for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) {
487                 final String table_cell = mapping_table.getValue( col, row );
488                 if ( col == 0 ) {
489                     name = table_cell;
490                 }
491                 else if ( table_cell != null ) {
492                     final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) );
493                     final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() );
494                     row_map.put( key, val );
495                 }
496             }
497             map.put( name, row_map );
498         }
499         return map;
500     }
501
502     private static String processNameIntelligently( final String name ) {
503         final String[] s = name.split( " " );
504         if ( s.length < 2 ) {
505             return name;
506         }
507         else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "|" ) > 0 ) ) {
508             return s[ 0 ];
509         }
510         else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "|" ) > 0 ) ) {
511             return s[ 1 ];
512         }
513         else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "." ) > 0 ) ) {
514             return s[ 0 ];
515         }
516         else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "." ) > 0 ) ) {
517             return s[ 1 ];
518         }
519         else if ( s[ 0 ].indexOf( "_" ) > 0 ) {
520             return s[ 0 ];
521         }
522         else if ( s[ 1 ].indexOf( "_" ) > 0 ) {
523             return s[ 1 ];
524         }
525         else {
526             return s[ 0 ];
527         }
528     }
529
530     private static String processSimilarTo( final String name ) {
531         final int i = name.toLowerCase().indexOf( "similar to" );
532         String similar_to = "";
533         if ( i >= 0 ) {
534             similar_to = " similarity=" + name.substring( i + 10 ).trim();
535         }
536         final String pi = processNameIntelligently( name );
537         return pi + similar_to;
538     }
539
540     private static String sanitize( String s ) {
541         s = s.replace( ' ', '_' );
542         s = s.replace( '(', '{' );
543         s = s.replace( ')', '}' );
544         s = s.replace( '[', '{' );
545         s = s.replace( ']', '}' );
546         s = s.replace( ',', '_' );
547         return s;
548     }
549
550     public static enum FIELD {
551         NODE_NAME, SEQUENCE_ANNOTATION_DESC, DOMAIN_STRUCTURE, TAXONOMY_CODE, TAXONOMY_SCIENTIFIC_NAME, SEQUENCE_NAME;
552     }
553 }