error things
[jalview.git] / forester / java / src / org / forester / tools / PhylogenyDecorator.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.tools;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.util.HashMap;
31 import java.util.Map;
32 import java.util.regex.Pattern;
33
34 import org.forester.io.parsers.nhx.NHXFormatException;
35 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
36 import org.forester.phylogeny.Phylogeny;
37 import org.forester.phylogeny.PhylogenyNode;
38 import org.forester.phylogeny.data.Accession;
39 import org.forester.phylogeny.data.Annotation;
40 import org.forester.phylogeny.data.DomainArchitecture;
41 import org.forester.phylogeny.data.Identifier;
42 import org.forester.phylogeny.data.Sequence;
43 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
44 import org.forester.util.BasicTable;
45 import org.forester.util.BasicTableParser;
46 import org.forester.util.ForesterUtil;
47
48 public final class PhylogenyDecorator {
49
50     // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb:
51     final private static String  TP_TAXONOMY_CODE                   = "TAXONOMY_CODE";
52     final private static String  TP_TAXONOMY_ID                     = "TAXONOMY_ID";
53     final private static String  TP_TAXONOMY_ID_PROVIDER            = "TAXONOMY_ID_PROVIDER";
54     final private static String  TP_TAXONOMY_SN                     = "TAXONOMY_SN";
55     final private static String  TP_TAXONOMY_CN                     = "TAXONOMY_CN";
56     final private static String  TP_TAXONOMY_SYN                    = "TAXONOMY_SYN";
57     final private static String  TP_SEQ_SYMBOL                      = "SEQ_SYMBOL";
58     final private static String  TP_SEQ_ACCESSION                   = "SEQ_ACCESSION";
59     final private static String  TP_SEQ_ACCESSION_SOURCE            = "SEQ_ACCESSION_SOURCE";
60     final private static String  TP_SEQ_ANNOTATION_DESC             = "SEQ_ANNOTATION_DESC";
61     final private static String  TP_SEQ_ANNOTATION_REF              = "SEQ_ANNOTATION_REF";
62     final private static String  TP_SEQ_MOL_SEQ                     = "SEQ_MOL_SEQ";
63     final private static String  TP_SEQ_NAME                        = "SEQ_NAME";
64     final private static String  TP_NODE_NAME                       = "NODE_NAME";
65     final private static Pattern NODENAME_SEQNUMBER_TAXDOMAINNUMBER = Pattern
66                                                                             .compile( "^([a-fA-Z0-9]{1,5})_([A-Z0-9]{2,4}[A-Z])(\\d{1,4})$" );
67     public final static boolean  SANITIZE                           = false;
68     public final static boolean  VERBOSE                            = true;
69     private static final boolean CUT                                = true;
70
71     private PhylogenyDecorator() {
72         // Not needed.
73     }
74
75     public static void decorate( final Phylogeny phylogeny,
76                                  final Map<String, Map<String, String>> map,
77                                  final boolean picky,
78                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
79             throws IllegalArgumentException, PhyloXmlDataFormatException {
80         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
81             final PhylogenyNode node = iter.next();
82             final String name = node.getName();
83             if ( !ForesterUtil.isEmpty( name ) ) {
84                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
85                     Map<String, String> new_values = map.get( name );
86                     int x = 0;
87                     while ( ( new_values == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
88                             && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
89                         new_values = map.get( name.substring( 0, name.length() - x ) );
90                         ++x;
91                     }
92                     if ( new_values != null ) {
93                         if ( new_values.containsKey( TP_TAXONOMY_CODE ) ) {
94                             ForesterUtil.ensurePresenceOfTaxonomy( node );
95                             node.getNodeData().getTaxonomy().setTaxonomyCode( new_values.get( TP_TAXONOMY_CODE ) );
96                         }
97                         if ( new_values.containsKey( TP_TAXONOMY_ID )
98                                 && new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) {
99                             ForesterUtil.ensurePresenceOfTaxonomy( node );
100                             node.getNodeData()
101                                     .getTaxonomy()
102                                     .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ),
103                                                                     new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) );
104                         }
105                         else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) {
106                             ForesterUtil.ensurePresenceOfTaxonomy( node );
107                             node.getNodeData().getTaxonomy()
108                                     .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) );
109                         }
110                         if ( new_values.containsKey( TP_TAXONOMY_SN ) ) {
111                             ForesterUtil.ensurePresenceOfTaxonomy( node );
112                             node.getNodeData().getTaxonomy().setScientificName( new_values.get( TP_TAXONOMY_SN ) );
113                         }
114                         if ( new_values.containsKey( TP_TAXONOMY_CN ) ) {
115                             ForesterUtil.ensurePresenceOfTaxonomy( node );
116                             node.getNodeData().getTaxonomy().setCommonName( new_values.get( TP_TAXONOMY_CN ) );
117                         }
118                         if ( new_values.containsKey( TP_TAXONOMY_SYN ) ) {
119                             ForesterUtil.ensurePresenceOfTaxonomy( node );
120                             node.getNodeData().getTaxonomy().getSynonyms().add( new_values.get( TP_TAXONOMY_SYN ) );
121                         }
122                         if ( new_values.containsKey( TP_SEQ_ACCESSION )
123                                 && new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) {
124                             ForesterUtil.ensurePresenceOfSequence( node );
125                             node.getNodeData()
126                                     .getSequence()
127                                     .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ),
128                                                                   new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) );
129                         }
130                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) {
131                             ForesterUtil.ensurePresenceOfSequence( node );
132                             final Annotation ann = new Annotation();
133                             ann.setDesc( new_values.get( TP_SEQ_ANNOTATION_DESC ) );
134                             node.getNodeData().getSequence().addAnnotation( ann );
135                         }
136                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_REF ) ) {
137                             ForesterUtil.ensurePresenceOfSequence( node );
138                             final Annotation ann = new Annotation( new_values.get( TP_SEQ_ANNOTATION_REF ) );
139                             node.getNodeData().getSequence().addAnnotation( ann );
140                         }
141                         if ( new_values.containsKey( TP_SEQ_SYMBOL ) ) {
142                             ForesterUtil.ensurePresenceOfSequence( node );
143                             node.getNodeData().getSequence().setSymbol( new_values.get( TP_SEQ_SYMBOL ) );
144                         }
145                         if ( new_values.containsKey( TP_SEQ_NAME ) ) {
146                             ForesterUtil.ensurePresenceOfSequence( node );
147                             node.getNodeData().getSequence().setName( new_values.get( TP_SEQ_NAME ) );
148                         }
149                         if ( new_values.containsKey( TP_SEQ_MOL_SEQ ) ) {
150                             ForesterUtil.ensurePresenceOfSequence( node );
151                             node.getNodeData().getSequence().setMolecularSequence( new_values.get( TP_SEQ_MOL_SEQ ) );
152                         }
153                         if ( new_values.containsKey( TP_NODE_NAME ) ) {
154                             node.setName( new_values.get( TP_NODE_NAME ) );
155                         }
156                     } // if ( new_values != null ) 
157                 } // if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) )
158                 else if ( picky ) {
159                     throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
160                 }
161             }
162         }
163     }
164
165     /**
166      * 
167      * 
168      * 
169      * 
170      * 
171      * @param phylogeny
172      * @param map
173      *            maps names (in phylogeny) to new values
174      * @param field
175      * @param picky
176      * @throws IllegalArgumentException
177      * @throws NHXFormatException
178      * @throws PhyloXmlDataFormatException 
179      */
180     public static void decorate( final Phylogeny phylogeny,
181                                  final Map<String, String> map,
182                                  final FIELD field,
183                                  final boolean extract_bracketed_scientific_name,
184                                  final boolean extract_bracketed_tax_code,
185                                  final boolean picky,
186                                  final boolean cut_name_after_space,
187                                  final boolean process_name_intelligently,
188                                  final boolean process_similar_to,
189                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
190                                  final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
191             PhyloXmlDataFormatException {
192         PhylogenyDecorator.decorate( phylogeny,
193                                      map,
194                                      field,
195                                      extract_bracketed_scientific_name,
196                                      extract_bracketed_tax_code,
197                                      picky,
198                                      null,
199                                      cut_name_after_space,
200                                      process_name_intelligently,
201                                      process_similar_to,
202                                      numbers_of_chars_allowed_to_remove_if_not_found_in_map,
203                                      trim_after_tilde );
204     }
205
206     /**
207      * 
208      * 
209      * 
210      * @param phylogeny
211      * @param map
212      *            maps names (in phylogeny) to new values if intermediate_map is
213      *            null otherwise maps intermediate value to new value
214      * @param field
215      * @param picky
216      * @param intermediate_map
217      *            maps name (in phylogeny) to a intermediate value
218      * @throws IllegalArgumentException
219      * @throws PhyloXmlDataFormatException 
220      */
221     public static void decorate( final Phylogeny phylogeny,
222                                  final Map<String, String> map,
223                                  final FIELD field,
224                                  final boolean extract_bracketed_scientific_name,
225                                  final boolean extract_bracketed_tax_code,
226                                  final boolean picky,
227                                  final Map<String, String> intermediate_map,
228                                  final boolean cut_name_after_space,
229                                  final boolean process_name_intelligently,
230                                  final boolean process_similar_to,
231                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
232                                  final boolean trim_after_tilde ) throws IllegalArgumentException,
233             PhyloXmlDataFormatException {
234         if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
235             throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" );
236         }
237         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
238             final PhylogenyNode node = iter.next();
239             String name = node.getName();
240             if ( trim_after_tilde && ( name.indexOf( '~' ) > 0 ) ) {
241                 name = name.substring( 0, name.indexOf( '~' ) );
242             }
243             if ( !ForesterUtil.isEmpty( name ) ) {
244                 if ( intermediate_map != null ) {
245                     name = PhylogenyDecorator.extractIntermediate( intermediate_map, name );
246                 }
247                 // int space_index = name.indexOf( " " );
248                 //                if ( CUT && space_index > 0 ) {
249                 //                    int y = name.lastIndexOf( "|" );
250                 //                    name = name.substring( y + 1, space_index );
251                 //                }
252                 //                String new_value = null;
253                 //                for( String key : map.keySet() ) {
254                 //                    if ( key.indexOf( name ) >= 0 ) {
255                 //                        if ( new_value == null ) {
256                 //                            new_value = map.get( key );
257                 //                        }
258                 //                        else {
259                 //                            System.out.println( name + " is not unique" );
260                 //                            System.exit( -1 );
261                 //                        }
262                 //                    }
263                 //                }
264                 // if ( new_value != null ) {
265                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
266                     String new_value = map.get( name );
267                     int x = 0;
268                     while ( ( new_value == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
269                             && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
270                         new_value = map.get( name.substring( 0, name.length() - x ) );
271                         ++x;
272                     }
273                     if ( new_value != null ) {
274                         new_value = new_value.trim();
275                         new_value.replaceAll( "/\\s+/", " " );
276                         if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) {
277                             new_value = extractBracketedScientificNames( node, new_value );
278                         }
279                         else if ( extract_bracketed_tax_code && new_value.endsWith( "]" ) ) {
280                             new_value = extractBracketedTaxCodes( node, new_value );
281                         }
282                         switch ( field ) {
283                             case SEQUENCE_ANNOTATION_DESC:
284                                 if ( PhylogenyDecorator.VERBOSE ) {
285                                     System.out.println( name + ": " + new_value );
286                                 }
287                                 if ( !node.getNodeData().isHasSequence() ) {
288                                     node.getNodeData().setSequence( new Sequence() );
289                                 }
290                                 final Annotation annotation = new Annotation( "?" );
291                                 annotation.setDesc( new_value );
292                                 node.getNodeData().getSequence().addAnnotation( annotation );
293                                 break;
294                             case DOMAIN_STRUCTURE:
295                                 if ( PhylogenyDecorator.VERBOSE ) {
296                                     System.out.println( name + ": " + new_value );
297                                 }
298                                 if ( !node.getNodeData().isHasSequence() ) {
299                                     node.getNodeData().setSequence( new Sequence() );
300                                 }
301                                 node.getNodeData().getSequence()
302                                         .setDomainArchitecture( new DomainArchitecture( new_value ) );
303                                 break;
304                             case TAXONOMY_CODE:
305                                 if ( PhylogenyDecorator.VERBOSE ) {
306                                     System.out.println( name + ": " + new_value );
307                                 }
308                                 ForesterUtil.ensurePresenceOfTaxonomy( node );
309                                 node.getNodeData().getTaxonomy().setTaxonomyCode( new_value );
310                                 break;
311                             case TAXONOMY_SCIENTIFIC_NAME:
312                                 if ( PhylogenyDecorator.VERBOSE ) {
313                                     System.out.println( name + ": " + new_value );
314                                 }
315                                 ForesterUtil.ensurePresenceOfTaxonomy( node );
316                                 node.getNodeData().getTaxonomy().setScientificName( new_value );
317                                 break;
318                             case SEQUENCE_NAME:
319                                 if ( PhylogenyDecorator.VERBOSE ) {
320                                     System.out.println( name + ": " + new_value );
321                                 }
322                                 if ( !node.getNodeData().isHasSequence() ) {
323                                     node.getNodeData().setSequence( new Sequence() );
324                                 }
325                                 node.getNodeData().getSequence().setName( new_value );
326                                 break;
327                             case NODE_NAME:
328                                 if ( PhylogenyDecorator.VERBOSE ) {
329                                     System.out.print( name + " -> " );
330                                 }
331                                 if ( cut_name_after_space ) {
332                                     if ( PhylogenyDecorator.VERBOSE ) {
333                                         System.out.print( new_value + " -> " );
334                                     }
335                                     new_value = PhylogenyDecorator.deleteAtFirstSpace( new_value );
336                                 }
337                                 else if ( process_name_intelligently ) {
338                                     if ( PhylogenyDecorator.VERBOSE ) {
339                                         System.out.print( new_value + " -> " );
340                                     }
341                                     new_value = PhylogenyDecorator.processNameIntelligently( new_value );
342                                 }
343                                 else if ( process_similar_to ) {
344                                     if ( PhylogenyDecorator.VERBOSE ) {
345                                         System.out.print( new_value + " -> " );
346                                     }
347                                     new_value = PhylogenyDecorator.processSimilarTo( new_value );
348                                 }
349                                 if ( PhylogenyDecorator.SANITIZE ) {
350                                     new_value = PhylogenyDecorator.sanitize( new_value );
351                                 }
352                                 if ( PhylogenyDecorator.VERBOSE ) {
353                                     System.out.println( new_value );
354                                 }
355                                 node.setName( new_value );
356                                 break;
357                             default:
358                                 throw new RuntimeException( "unknown field \"" + field + "\"" );
359                         }
360                     }
361                 }
362                 else if ( picky ) {
363                     throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
364                 }
365             }
366         }
367     }
368
369     public static void decorate( final Phylogeny[] phylogenies,
370                                  final Map<String, Map<String, String>> map,
371                                  final boolean picky,
372                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
373             throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException {
374         for( final Phylogeny phylogenie : phylogenies ) {
375             PhylogenyDecorator
376                     .decorate( phylogenie, map, picky, numbers_of_chars_allowed_to_remove_if_not_found_in_map );
377         }
378     }
379
380     public static void decorate( final Phylogeny[] phylogenies,
381                                  final Map<String, String> map,
382                                  final FIELD field,
383                                  final boolean extract_bracketed_scientific_name,
384                                  final boolean extract_bracketed_tax_code,
385                                  final boolean picky,
386                                  final boolean cut_name_after_space,
387                                  final boolean process_name_intelligently,
388                                  final boolean process_similar_to,
389                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
390                                  final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
391             PhyloXmlDataFormatException {
392         for( final Phylogeny phylogenie : phylogenies ) {
393             PhylogenyDecorator.decorate( phylogenie,
394                                          map,
395                                          field,
396                                          extract_bracketed_scientific_name,
397                                          extract_bracketed_tax_code,
398                                          picky,
399                                          cut_name_after_space,
400                                          process_name_intelligently,
401                                          process_similar_to,
402                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
403                                          trim_after_tilde );
404         }
405     }
406
407     public static void decorate( final Phylogeny[] phylogenies,
408                                  final Map<String, String> map,
409                                  final FIELD field,
410                                  final boolean extract_bracketed_scientific_name,
411                                  final boolean extract_bracketed_tax_code,
412                                  final boolean picky,
413                                  final Map<String, String> intermediate_map,
414                                  final boolean cut_name_after_space,
415                                  final boolean process_name_intelligently,
416                                  final boolean process_similar_to,
417                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
418                                  final boolean trim_after_tilde ) throws IllegalArgumentException, NHXFormatException,
419             PhyloXmlDataFormatException {
420         for( final Phylogeny phylogenie : phylogenies ) {
421             PhylogenyDecorator.decorate( phylogenie,
422                                          map,
423                                          field,
424                                          extract_bracketed_scientific_name,
425                                          extract_bracketed_tax_code,
426                                          picky,
427                                          intermediate_map,
428                                          cut_name_after_space,
429                                          process_name_intelligently,
430                                          process_similar_to,
431                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
432                                          trim_after_tilde );
433         }
434     }
435
436     private static String deleteAtFirstSpace( final String name ) {
437         final int first_space = name.indexOf( " " );
438         if ( first_space > 1 ) {
439             return name.substring( 0, first_space ).trim();
440         }
441         return name;
442     }
443
444     private static String extractBracketedScientificNames( final PhylogenyNode node, final String new_value ) {
445         final int i = new_value.lastIndexOf( "[" );
446         final String scientific_name = new_value.substring( i + 1, new_value.length() - 1 );
447         ForesterUtil.ensurePresenceOfTaxonomy( node );
448         node.getNodeData().getTaxonomy().setScientificName( scientific_name );
449         return new_value.substring( 0, i - 1 ).trim();
450     }
451
452     private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) {
453         final int i = new_value.lastIndexOf( "[" );
454         final String tc = new_value.substring( i + 1, new_value.length() - 1 );
455         ForesterUtil.ensurePresenceOfTaxonomy( node );
456         try {
457             node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
458         }
459         catch ( final PhyloXmlDataFormatException e ) {
460             throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
461         }
462         return new_value.substring( 0, i - 1 ).trim();
463     }
464
465     private static String extractIntermediate( final Map<String, String> intermediate_map, final String name ) {
466         String new_name = null;
467         if ( PhylogenyDecorator.VERBOSE ) {
468             System.out.print( name + " => " );
469         }
470         if ( intermediate_map.containsKey( name ) ) {
471             new_name = intermediate_map.get( name );
472             if ( ForesterUtil.isEmpty( new_name ) ) {
473                 throw new IllegalArgumentException( "\"" + name + "\" maps to null or empty string in secondary map" );
474             }
475         }
476         else {
477             throw new IllegalArgumentException( "\"" + name + "\" not found in name secondary map" );
478         }
479         if ( PhylogenyDecorator.VERBOSE ) {
480             System.out.println( new_name + "  " );
481         }
482         return new_name;
483     }
484
485     public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
486             throws IOException {
487         final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
488         BasicTable<String> mapping_table = null;
489         mapping_table = BasicTableParser.parse( mapping_table_file, "\t", false, false );
490         for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
491             final Map<String, String> row_map = new HashMap<String, String>();
492             String name = null;
493             for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) {
494                 final String table_cell = mapping_table.getValue( col, row );
495                 if ( col == 0 ) {
496                     name = table_cell;
497                 }
498                 else if ( table_cell != null ) {
499                     final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) );
500                     final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() );
501                     row_map.put( key, val );
502                 }
503             }
504             map.put( name, row_map );
505         }
506         return map;
507     }
508
509     private static String processNameIntelligently( final String name ) {
510         final String[] s = name.split( " " );
511         if ( s.length < 2 ) {
512             return name;
513         }
514         else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "|" ) > 0 ) ) {
515             return s[ 0 ];
516         }
517         else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "|" ) > 0 ) ) {
518             return s[ 1 ];
519         }
520         else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "." ) > 0 ) ) {
521             return s[ 0 ];
522         }
523         else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "." ) > 0 ) ) {
524             return s[ 1 ];
525         }
526         else if ( s[ 0 ].indexOf( "_" ) > 0 ) {
527             return s[ 0 ];
528         }
529         else if ( s[ 1 ].indexOf( "_" ) > 0 ) {
530             return s[ 1 ];
531         }
532         else {
533             return s[ 0 ];
534         }
535     }
536
537     private static String processSimilarTo( final String name ) {
538         final int i = name.toLowerCase().indexOf( "similar to" );
539         String similar_to = "";
540         if ( i >= 0 ) {
541             similar_to = " similarity=" + name.substring( i + 10 ).trim();
542         }
543         final String pi = processNameIntelligently( name );
544         return pi + similar_to;
545     }
546
547     private static String sanitize( String s ) {
548         s = s.replace( ' ', '_' );
549         s = s.replace( '(', '{' );
550         s = s.replace( ')', '}' );
551         s = s.replace( '[', '{' );
552         s = s.replace( ']', '}' );
553         s = s.replace( ',', '_' );
554         return s;
555     }
556
557     public static enum FIELD {
558         NODE_NAME, SEQUENCE_ANNOTATION_DESC, DOMAIN_STRUCTURE, TAXONOMY_CODE, TAXONOMY_SCIENTIFIC_NAME, SEQUENCE_NAME;
559     }
560 }