27a5b03327b0fd6f5243e2078b365223b252c161
[jalview.git] / forester / java / src / org / forester / tools / PhylogenyDecorator.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
25
26 package org.forester.tools;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.util.HashMap;
31 import java.util.Map;
32 import java.util.regex.Matcher;
33
34 import org.forester.io.parsers.nhx.NHXFormatException;
35 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
36 import org.forester.io.parsers.util.ParserUtils;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.PhylogenyNode;
39 import org.forester.phylogeny.data.Accession;
40 import org.forester.phylogeny.data.Annotation;
41 import org.forester.phylogeny.data.DomainArchitecture;
42 import org.forester.phylogeny.data.Identifier;
43 import org.forester.phylogeny.data.Sequence;
44 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
45 import org.forester.util.BasicTable;
46 import org.forester.util.BasicTableParser;
47 import org.forester.util.ForesterUtil;
48
49 public final class PhylogenyDecorator {
50
51     public final static boolean SANITIZE                = false;
52     final private static String TP_NODE_NAME            = "NODE_NAME";
53     final private static String TP_SEQ_ACCESSION        = "SEQ_ACCESSION";
54     final private static String TP_SEQ_ACCESSION_SOURCE = "SEQ_ACCESSION_SOURCE";
55     final private static String TP_SEQ_ANNOTATION_DESC  = "SEQ_ANNOTATION_DESC";
56     final private static String TP_SEQ_ANNOTATION_REF   = "SEQ_ANNOTATION_REF";
57     final private static String TP_SEQ_MOL_SEQ          = "SEQ_MOL_SEQ";
58     final private static String TP_SEQ_NAME             = "SEQ_NAME";
59     final private static String TP_SEQ_SYMBOL           = "SEQ_SYMBOL";
60     final private static String TP_TAXONOMY_CN          = "TAXONOMY_CN";
61     // From evoruby/lib/evo/apps/tseq_taxonomy_processor.rb:
62     final private static String TP_TAXONOMY_CODE        = "TAXONOMY_CODE";
63     final private static String TP_TAXONOMY_ID          = "TAXONOMY_ID";
64     final private static String TP_TAXONOMY_ID_PROVIDER = "TAXONOMY_ID_PROVIDER";
65     final private static String TP_TAXONOMY_SN          = "TAXONOMY_SN";
66     final private static String TP_TAXONOMY_SYN         = "TAXONOMY_SYN";
67
68     private PhylogenyDecorator() {
69         // Not needed.
70     }
71
72     public static void decorate( final Phylogeny phylogeny,
73                                  final Map<String, Map<String, String>> map,
74                                  final boolean picky,
75                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
76             throws IllegalArgumentException, PhyloXmlDataFormatException {
77         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
78             final PhylogenyNode node = iter.next();
79             final String name = node.getName();
80             if ( !ForesterUtil.isEmpty( name ) ) {
81                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
82                     Map<String, String> new_values = map.get( name );
83                     int x = 0;
84                     while ( ( new_values == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
85                             && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
86                         new_values = map.get( name.substring( 0, name.length() - x ) );
87                         ++x;
88                     }
89                     if ( new_values != null ) {
90                         if ( new_values.containsKey( TP_TAXONOMY_CODE ) ) {
91                             ForesterUtil.ensurePresenceOfTaxonomy( node );
92                             node.getNodeData().getTaxonomy().setTaxonomyCode( new_values.get( TP_TAXONOMY_CODE ) );
93                         }
94                         if ( new_values.containsKey( TP_TAXONOMY_ID )
95                                 && new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) {
96                             ForesterUtil.ensurePresenceOfTaxonomy( node );
97                             node.getNodeData()
98                                     .getTaxonomy()
99                                     .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ),
100                                                                     new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) );
101                         }
102                         else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) {
103                             ForesterUtil.ensurePresenceOfTaxonomy( node );
104                             node.getNodeData().getTaxonomy()
105                                     .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) );
106                         }
107                         if ( new_values.containsKey( TP_TAXONOMY_SN ) ) {
108                             ForesterUtil.ensurePresenceOfTaxonomy( node );
109                             node.getNodeData().getTaxonomy().setScientificName( new_values.get( TP_TAXONOMY_SN ) );
110                         }
111                         if ( new_values.containsKey( TP_TAXONOMY_CN ) ) {
112                             ForesterUtil.ensurePresenceOfTaxonomy( node );
113                             node.getNodeData().getTaxonomy().setCommonName( new_values.get( TP_TAXONOMY_CN ) );
114                         }
115                         if ( new_values.containsKey( TP_TAXONOMY_SYN ) ) {
116                             ForesterUtil.ensurePresenceOfTaxonomy( node );
117                             node.getNodeData().getTaxonomy().getSynonyms().add( new_values.get( TP_TAXONOMY_SYN ) );
118                         }
119                         if ( new_values.containsKey( TP_SEQ_ACCESSION )
120                                 && new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) {
121                             ForesterUtil.ensurePresenceOfSequence( node );
122                             node.getNodeData()
123                                     .getSequence()
124                                     .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ),
125                                                                   new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) );
126                         }
127                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) {
128                             ForesterUtil.ensurePresenceOfSequence( node );
129                             final Annotation ann = new Annotation();
130                             ann.setDesc( new_values.get( TP_SEQ_ANNOTATION_DESC ) );
131                             node.getNodeData().getSequence().addAnnotation( ann );
132                         }
133                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_REF ) ) {
134                             ForesterUtil.ensurePresenceOfSequence( node );
135                             final Annotation ann = new Annotation( new_values.get( TP_SEQ_ANNOTATION_REF ) );
136                             node.getNodeData().getSequence().addAnnotation( ann );
137                         }
138                         if ( new_values.containsKey( TP_SEQ_SYMBOL ) ) {
139                             ForesterUtil.ensurePresenceOfSequence( node );
140                             node.getNodeData().getSequence().setSymbol( new_values.get( TP_SEQ_SYMBOL ) );
141                         }
142                         if ( new_values.containsKey( TP_SEQ_NAME ) ) {
143                             ForesterUtil.ensurePresenceOfSequence( node );
144                             node.getNodeData().getSequence().setName( new_values.get( TP_SEQ_NAME ) );
145                         }
146                         if ( new_values.containsKey( TP_SEQ_MOL_SEQ ) ) {
147                             ForesterUtil.ensurePresenceOfSequence( node );
148                             node.getNodeData().getSequence().setMolecularSequence( new_values.get( TP_SEQ_MOL_SEQ ) );
149                         }
150                         if ( new_values.containsKey( TP_NODE_NAME ) ) {
151                             node.setName( new_values.get( TP_NODE_NAME ) );
152                         }
153                     } // if ( new_values != null ) 
154                 } // if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) )
155                 else if ( picky ) {
156                     throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
157                 }
158             }
159         }
160     }
161
162     public static void decorate( final Phylogeny phylogeny,
163                                  final Map<String, String> map,
164                                  final FIELD field,
165                                  final boolean extract_bracketed_scientific_name,
166                                  final boolean extract_bracketed_tax_code,
167                                  final boolean picky,
168                                  final boolean cut_name_after_space,
169                                  final boolean process_name_intelligently,
170                                  final boolean process_similar_to,
171                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
172                                  final boolean trim_after_tilde,
173                                  final boolean verbose ) throws IllegalArgumentException, NHXFormatException,
174             PhyloXmlDataFormatException {
175         PhylogenyDecorator.decorate( phylogeny,
176                                      map,
177                                      field,
178                                      extract_bracketed_scientific_name,
179                                      extract_bracketed_tax_code,
180                                      picky,
181                                      null,
182                                      cut_name_after_space,
183                                      process_name_intelligently,
184                                      process_similar_to,
185                                      numbers_of_chars_allowed_to_remove_if_not_found_in_map,
186                                      trim_after_tilde,
187                                      verbose );
188     }
189
190     /**
191      * 
192      * 
193      * 
194      * @param phylogeny
195      * @param map
196      *            maps names (in phylogeny) to new values if intermediate_map is
197      *            null otherwise maps intermediate value to new value
198      * @param field
199      * @param picky
200      * @param intermediate_map
201      *            maps name (in phylogeny) to a intermediate value
202      * @throws IllegalArgumentException
203      * @throws PhyloXmlDataFormatException 
204      */
205     public static void decorate( final Phylogeny phylogeny,
206                                  final Map<String, String> map,
207                                  final FIELD field,
208                                  final boolean extract_bracketed_scientific_name,
209                                  final boolean extract_bracketed_tax_code,
210                                  final boolean picky,
211                                  final Map<String, String> intermediate_map,
212                                  final boolean cut_name_after_space,
213                                  final boolean process_name_intelligently,
214                                  final boolean process_similar_to,
215                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
216                                  final boolean trim_after_tilde,
217                                  final boolean verbose ) throws IllegalArgumentException, PhyloXmlDataFormatException {
218         if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
219             throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" );
220         }
221         if ( map.isEmpty() ) {
222             throw new IllegalArgumentException( "map is empty" );
223         }
224         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
225             final PhylogenyNode node = iter.next();
226             String name = node.getName();
227             String tilde_annotation = null;
228             if ( trim_after_tilde && ( name.indexOf( '~' ) > 0 ) ) {
229                 final int ti = name.indexOf( '~' );
230                 tilde_annotation = name.substring( ti );
231                 name = name.substring( 0, ti );
232             }
233             if ( !ForesterUtil.isEmpty( name ) ) {
234                 if ( intermediate_map != null ) {
235                     name = PhylogenyDecorator.extractIntermediate( intermediate_map, name, verbose );
236                 }
237                 if ( map.containsKey( name ) || ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 ) ) {
238                     String new_value = map.get( name );
239                     int x = 0;
240                     while ( ( new_value == null ) && ( numbers_of_chars_allowed_to_remove_if_not_found_in_map > 0 )
241                             && ( x <= numbers_of_chars_allowed_to_remove_if_not_found_in_map ) ) {
242                         new_value = map.get( name.substring( 0, name.length() - x ) );
243                         ++x;
244                     }
245                     if ( new_value != null ) {
246                         new_value = new_value.trim();
247                         new_value.replaceAll( "/\\s+/", " " );
248                         if ( extract_bracketed_scientific_name && new_value.endsWith( "]" ) ) {
249                             new_value = extractBracketedScientificNames( node, new_value );
250                         }
251                         else if ( extract_bracketed_tax_code ) {
252                             if ( ParserUtils.TAXOMONY_CODE_PATTERN_BRACKETED.matcher( new_value ).find() ) {
253                                 new_value = extractBracketedTaxCodes( node, new_value );
254                             }
255                             else if ( picky ) {
256                                 throw new IllegalArgumentException( " could not get taxonomy from \"" + new_value
257                                         + "\"" );
258                             }
259                         }
260                         switch ( field ) {
261                             case MOL_SEQ:
262                                 if ( verbose ) {
263                                     System.out.println( name + ": " + new_value );
264                                 }
265                                 if ( !node.getNodeData().isHasSequence() ) {
266                                     node.getNodeData().setSequence( new Sequence() );
267                                 }
268                                 node.getNodeData().getSequence().setMolecularSequence( new_value );
269                                 break;
270                             case SEQUENCE_ANNOTATION_DESC:
271                                 if ( verbose ) {
272                                     System.out.println( name + ": " + new_value );
273                                 }
274                                 if ( !node.getNodeData().isHasSequence() ) {
275                                     node.getNodeData().setSequence( new Sequence() );
276                                 }
277                                 final Annotation annotation = new Annotation();
278                                 annotation.setDesc( new_value );
279                                 node.getNodeData().getSequence().addAnnotation( annotation );
280                                 break;
281                             case DOMAIN_STRUCTURE:
282                                 if ( verbose ) {
283                                     System.out.println( name + ": " + new_value );
284                                 }
285                                 if ( !node.getNodeData().isHasSequence() ) {
286                                     node.getNodeData().setSequence( new Sequence() );
287                                 }
288                                 node.getNodeData().getSequence()
289                                         .setDomainArchitecture( new DomainArchitecture( new_value ) );
290                                 break;
291                             case TAXONOMY_CODE:
292                                 if ( verbose ) {
293                                     System.out.println( name + ": " + new_value );
294                                 }
295                                 ForesterUtil.ensurePresenceOfTaxonomy( node );
296                                 node.getNodeData().getTaxonomy().setTaxonomyCode( new_value );
297                                 break;
298                             case TAXONOMY_SCIENTIFIC_NAME:
299                                 if ( verbose ) {
300                                     System.out.println( name + ": " + new_value );
301                                 }
302                                 ForesterUtil.ensurePresenceOfTaxonomy( node );
303                                 node.getNodeData().getTaxonomy().setScientificName( new_value );
304                                 break;
305                             case SEQUENCE_NAME:
306                                 if ( trim_after_tilde ) {
307                                     new_value = addTildeAnnotation( tilde_annotation, new_value );
308                                 }
309                                 if ( verbose ) {
310                                     System.out.println( name + ": " + new_value );
311                                 }
312                                 if ( !node.getNodeData().isHasSequence() ) {
313                                     node.getNodeData().setSequence( new Sequence() );
314                                 }
315                                 node.getNodeData().getSequence().setName( new_value );
316                                 break;
317                             case NODE_NAME:
318                                 if ( verbose ) {
319                                     System.out.print( name + " -> " );
320                                 }
321                                 if ( cut_name_after_space ) {
322                                     if ( verbose ) {
323                                         System.out.print( new_value + " -> " );
324                                     }
325                                     new_value = PhylogenyDecorator.deleteAtFirstSpace( new_value );
326                                 }
327                                 else if ( process_name_intelligently ) {
328                                     if ( verbose ) {
329                                         System.out.print( new_value + " -> " );
330                                     }
331                                     new_value = PhylogenyDecorator.processNameIntelligently( new_value );
332                                 }
333                                 else if ( process_similar_to ) {
334                                     if ( verbose ) {
335                                         System.out.print( new_value + " -> " );
336                                     }
337                                     new_value = PhylogenyDecorator.processSimilarTo( new_value );
338                                 }
339                                 if ( PhylogenyDecorator.SANITIZE ) {
340                                     new_value = PhylogenyDecorator.sanitize( new_value );
341                                 }
342                                 if ( trim_after_tilde ) {
343                                     new_value = addTildeAnnotation( tilde_annotation, new_value );
344                                 }
345                                 if ( verbose ) {
346                                     System.out.println( new_value );
347                                 }
348                                 node.setName( new_value );
349                                 break;
350                             default:
351                                 throw new RuntimeException( "unknown field \"" + field + "\"" );
352                         }
353                     }
354                 }
355                 else if ( picky ) {
356                     throw new IllegalArgumentException( "\"" + name + "\" not found in name map" );
357                 }
358             }
359         }
360     }
361
362     public static void decorate( final Phylogeny[] phylogenies,
363                                  final Map<String, Map<String, String>> map,
364                                  final boolean picky,
365                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map )
366             throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException {
367         for( final Phylogeny phylogenie : phylogenies ) {
368             PhylogenyDecorator
369                     .decorate( phylogenie, map, picky, numbers_of_chars_allowed_to_remove_if_not_found_in_map );
370         }
371     }
372
373     public static void decorate( final Phylogeny[] phylogenies,
374                                  final Map<String, String> map,
375                                  final FIELD field,
376                                  final boolean extract_bracketed_scientific_name,
377                                  final boolean extract_bracketed_tax_code,
378                                  final boolean picky,
379                                  final boolean cut_name_after_space,
380                                  final boolean process_name_intelligently,
381                                  final boolean process_similar_to,
382                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
383                                  final boolean trim_after_tilde,
384                                  final boolean verbose ) throws IllegalArgumentException, NHXFormatException,
385             PhyloXmlDataFormatException {
386         for( final Phylogeny phylogenie : phylogenies ) {
387             PhylogenyDecorator.decorate( phylogenie,
388                                          map,
389                                          field,
390                                          extract_bracketed_scientific_name,
391                                          extract_bracketed_tax_code,
392                                          picky,
393                                          cut_name_after_space,
394                                          process_name_intelligently,
395                                          process_similar_to,
396                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
397                                          trim_after_tilde,
398                                          verbose );
399         }
400     }
401
402     public static void decorate( final Phylogeny[] phylogenies,
403                                  final Map<String, String> map,
404                                  final FIELD field,
405                                  final boolean extract_bracketed_scientific_name,
406                                  final boolean extract_bracketed_tax_code,
407                                  final boolean picky,
408                                  final Map<String, String> intermediate_map,
409                                  final boolean cut_name_after_space,
410                                  final boolean process_name_intelligently,
411                                  final boolean process_similar_to,
412                                  final int numbers_of_chars_allowed_to_remove_if_not_found_in_map,
413                                  final boolean trim_after_tilde,
414                                  final boolean verbose ) throws IllegalArgumentException, NHXFormatException,
415             PhyloXmlDataFormatException {
416         for( final Phylogeny phylogenie : phylogenies ) {
417             PhylogenyDecorator.decorate( phylogenie,
418                                          map,
419                                          field,
420                                          extract_bracketed_scientific_name,
421                                          extract_bracketed_tax_code,
422                                          picky,
423                                          intermediate_map,
424                                          cut_name_after_space,
425                                          process_name_intelligently,
426                                          process_similar_to,
427                                          numbers_of_chars_allowed_to_remove_if_not_found_in_map,
428                                          trim_after_tilde,
429                                          verbose );
430         }
431     }
432
433     public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
434             throws IOException {
435         final Map<String, Map<String, String>> map = new HashMap<String, Map<String, String>>();
436         BasicTable<String> mapping_table = null;
437         mapping_table = BasicTableParser.parse( mapping_table_file, '\t', false, false );
438         for( int row = 0; row < mapping_table.getNumberOfRows(); ++row ) {
439             final Map<String, String> row_map = new HashMap<String, String>();
440             String name = null;
441             for( int col = 0; col < mapping_table.getNumberOfColumns(); ++col ) {
442                 final String table_cell = mapping_table.getValue( col, row );
443                 if ( col == 0 ) {
444                     name = table_cell;
445                 }
446                 else if ( table_cell != null ) {
447                     final String key = table_cell.substring( 0, table_cell.indexOf( ':' ) );
448                     final String val = table_cell.substring( table_cell.indexOf( ':' ) + 1, table_cell.length() );
449                     row_map.put( key, val );
450                 }
451             }
452             map.put( name, row_map );
453         }
454         return map;
455     }
456
457     private final static String addTildeAnnotation( final String tilde_annotation, final String new_value ) {
458         if ( ForesterUtil.isEmpty( tilde_annotation ) ) {
459             return new_value;
460         }
461         return new_value + tilde_annotation;
462     }
463
464     private static String deleteAtFirstSpace( final String name ) {
465         final int first_space = name.indexOf( " " );
466         if ( first_space > 1 ) {
467             return name.substring( 0, first_space ).trim();
468         }
469         return name;
470     }
471
472     private static String extractBracketedScientificNames( final PhylogenyNode node, final String new_value ) {
473         final int i = new_value.lastIndexOf( "[" );
474         final String scientific_name = new_value.substring( i + 1, new_value.length() - 1 );
475         ForesterUtil.ensurePresenceOfTaxonomy( node );
476         node.getNodeData().getTaxonomy().setScientificName( scientific_name );
477         return new_value.substring( 0, i - 1 ).trim();
478     }
479
480     private static String extractBracketedTaxCodes( final PhylogenyNode node, final String new_value ) {
481         final StringBuilder sb = new StringBuilder();
482         sb.append( new_value );
483         final String tc = extractBracketedTaxCodes( sb );
484         if ( !ForesterUtil.isEmpty( tc ) ) {
485             ForesterUtil.ensurePresenceOfTaxonomy( node );
486             try {
487                 node.getNodeData().getTaxonomy().setTaxonomyCode( tc );
488             }
489             catch ( final PhyloXmlDataFormatException e ) {
490                 throw new IllegalArgumentException( "illegal format for taxonomy code: " + tc );
491             }
492             return sb.toString().trim();
493         }
494         return new_value;
495     }
496
497     private static String extractBracketedTaxCodes( final StringBuilder sb ) {
498         final Matcher m = ParserUtils.TAXOMONY_CODE_PATTERN_BRACKETED.matcher( sb );
499         if ( m.find() ) {
500             final String tc = m.group( 1 );
501             sb.delete( m.start( 1 ) - 1, m.end( 1 ) + 1 );
502             return tc;
503         }
504         return null;
505     }
506
507     private static String extractIntermediate( final Map<String, String> intermediate_map,
508                                                final String name,
509                                                final boolean verbose ) {
510         String new_name = null;
511         if ( verbose ) {
512             System.out.print( name + " => " );
513         }
514         if ( intermediate_map.containsKey( name ) ) {
515             new_name = intermediate_map.get( name );
516             if ( ForesterUtil.isEmpty( new_name ) ) {
517                 throw new IllegalArgumentException( "\"" + name + "\" maps to null or empty string in secondary map" );
518             }
519         }
520         else {
521             throw new IllegalArgumentException( "\"" + name + "\" not found in name secondary map" );
522         }
523         if ( verbose ) {
524             System.out.println( new_name + "  " );
525         }
526         return new_name;
527     }
528
529     private static String processNameIntelligently( final String name ) {
530         final String[] s = name.split( " " );
531         if ( s.length < 2 ) {
532             return name;
533         }
534         else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "|" ) > 0 ) ) {
535             return s[ 0 ];
536         }
537         else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "|" ) > 0 ) ) {
538             return s[ 1 ];
539         }
540         else if ( ( s[ 0 ].indexOf( "_" ) > 0 ) && ( s[ 0 ].indexOf( "." ) > 0 ) ) {
541             return s[ 0 ];
542         }
543         else if ( ( s[ 1 ].indexOf( "_" ) > 0 ) && ( s[ 1 ].indexOf( "." ) > 0 ) ) {
544             return s[ 1 ];
545         }
546         else if ( s[ 0 ].indexOf( "_" ) > 0 ) {
547             return s[ 0 ];
548         }
549         else if ( s[ 1 ].indexOf( "_" ) > 0 ) {
550             return s[ 1 ];
551         }
552         else {
553             return s[ 0 ];
554         }
555     }
556
557     private static String processSimilarTo( final String name ) {
558         final int i = name.toLowerCase().indexOf( "similar to" );
559         String similar_to = "";
560         if ( i >= 0 ) {
561             similar_to = " similarity=" + name.substring( i + 10 ).trim();
562         }
563         final String pi = processNameIntelligently( name );
564         return pi + similar_to;
565     }
566
567     private static String sanitize( String s ) {
568         s = s.replace( ' ', '_' );
569         s = s.replace( '(', '{' );
570         s = s.replace( ')', '}' );
571         s = s.replace( '[', '{' );
572         s = s.replace( ']', '}' );
573         s = s.replace( ',', '_' );
574         return s;
575     }
576
577     public static enum FIELD {
578         DOMAIN_STRUCTURE,
579         MOL_SEQ,
580         NODE_NAME,
581         SEQUENCE_ANNOTATION_DESC,
582         SEQUENCE_NAME,
583         TAXONOMY_CODE,
584         TAXONOMY_SCIENTIFIC_NAME;
585     }
586 }