From: cmzmasek@gmail.com Date: Wed, 16 Apr 2014 00:14:25 +0000 (+0000) Subject: inprogress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=c11ce93b4619a0bbfcad671407e4bc98ff6e7a41;p=jalview.git inprogress --- diff --git a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java index 2921230..89b826a 100644 --- a/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java +++ b/forester/java/src/org/forester/io/parsers/nhx/NHXParser.java @@ -403,12 +403,10 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse } } // \n\t is always ignored, - // as is " (34) and ' (39) (space is 32): - if ( ( isIgnoreQuotes() && ( ( c < 33 ) || ( c > 126 ) || ( c == 34 ) || ( c == 39 ) || ( ( _clade_level == 0 ) && ( c == ';' ) ) ) ) - || ( !isIgnoreQuotes() && ( ( c < 32 ) || ( c > 126 ) || ( ( _clade_level == 0 ) && ( c == ';' ) ) ) ) ) { - //do nothing - } - else if ( ( c == 32 ) && ( !_in_single_quote && !_in_double_quote ) ) { + // "=34 '=39 space=32 + if ( ( c < 32 ) || ( c > 126 ) || ( isIgnoreQuotes() && ( ( c == 32 ) || ( c == 34 ) || ( c == 39 ) ) ) + || ( ( c == 32 ) && ( !_in_single_quote && !_in_double_quote ) ) + || ( ( _clade_level == 0 ) && ( c == ';' ) && ( !_in_single_quote && !_in_double_quote ) ) ) { //do nothing } else if ( _in_comment ) { @@ -421,10 +419,10 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse _in_double_quote = false; } else { - _current_anotation.append( c != ':' ? c : BELL ); + _current_anotation.append( changeCharInParens( c ) ); } } - else if ( c == '"' ) { + else if ( ( c == '"' ) && !_in_single_quote ) { _in_double_quote = true; } else if ( _in_single_quote ) { @@ -432,7 +430,7 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse _in_single_quote = false; } else { - _current_anotation.append( c != ':' ? c : BELL ); + _current_anotation.append( changeCharInParens( c ) ); } } else if ( c == 39 ) { @@ -496,6 +494,19 @@ public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParse } } + private final static char changeCharInParens( char c ) { + if ( c == ':' ) { + c = BELL; + } + else if ( c == '[' ) { + c = '{'; + } + else if ( c == ']' ) { + c = '}'; + } + return c; + } + private final void processCloseParen() throws PhylogenyParserException, NHXFormatException, PhyloXmlDataFormatException { if ( _clade_level < 0 ) { diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index d904a81..4c650a1 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -62,8 +62,6 @@ public final class ParserUtils { final public static Pattern TAXOMONY_CODE_PATTERN_BRACKETED = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" ); final public static Pattern TAXOMONY_CODE_PATTERN_PFR = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_(" + TAX_CODE + ")\\b" ); - // final public static Pattern TAXOMONY_SN_PATTERN = Pattern - // .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]{2,30}_[a-z]{3,30}(?:_[a-z][a-z0-9_]+)?)\\b" ); final public static Pattern TAXOMONY_SN_PATTERN_SN = Pattern.compile( "(?:\\b|_)(" + SN_BN + ")(?:(\\s*$)|([_ ][a-z]*[A-Z0-9]))" ); final public static Pattern TAXOMONY_SN_PATTERN_SNS = Pattern.compile( "(?:\\b|_)(" + SN_BN @@ -218,10 +216,6 @@ public final class ParserUtils { } public final static String extractScientificNameFromNodeName( final String name ) { - // final Matcher m = TAXOMONY_SN_PATTERN.matcher( name ); - // if ( m.find() ) { - // return m.group( 1 ).replace( '_', ' ' ); - // } final Matcher m_ss = TAXOMONY_SN_PATTERN_STRAIN_SUBSTRAIN.matcher( name ); if ( m_ss.find() ) { String s = m_ss.group( 1 ).replace( '_', ' ' ); @@ -275,7 +269,11 @@ public final class ParserUtils { } final Matcher m_sp = TAXOMONY_SN_PATTERN_SP.matcher( name ); if ( m_sp.find() ) { - return m_sp.group( 1 ).replace( '_', ' ' ); + String s = m_sp.group( 1 ).replace( '_', ' ' ); + if ( s.endsWith( " sp" ) ) { + s = s + "."; + } + return s; } return null; } diff --git a/forester/java/src/org/forester/io/writers/PhylogenyWriter.java b/forester/java/src/org/forester/io/writers/PhylogenyWriter.java index 6d490c5..6aee097 100644 --- a/forester/java/src/org/forester/io/writers/PhylogenyWriter.java +++ b/forester/java/src/org/forester/io/writers/PhylogenyWriter.java @@ -688,19 +688,19 @@ public final class PhylogenyWriter { else if ( !ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getCommonName() ) ) { data = node.getNodeData().getTaxonomy().getCommonName(); } - else if ( node.getNodeData().getTaxonomy().getTaxonomyCode() != null ) { - data = node.getNodeData().getTaxonomy().getTaxonomyCode(); - } } else if ( node.getNodeData().isHasSequence() ) { if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getName() ) ) { data = node.getNodeData().getSequence().getName(); } + else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getSymbol() ) ) { + data = node.getNodeData().getSequence().getSymbol(); + } + else if ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getGeneName() ) ) { + data = node.getNodeData().getSequence().getGeneName(); + } } - if ( data.length() > 0 ) { - data = data.replaceAll( " ", "_" ); - } - writer.write( data ); + writer.write( ForesterUtil.santitizeStringForNH( data ).toString() ); } writer.write( ";" ); writer.write( ForesterUtil.LINE_SEPARATOR ); diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java index 6a4876b..23eeec1 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyNode.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyNode.java @@ -889,7 +889,6 @@ public final class PhylogenyNode implements Comparable { // --------------------------------------------------------- final public String toNewHampshire( final boolean write_distance_to_parent, final NH_CONVERSION_SUPPORT_VALUE_STYLE svs ) { - final StringBuilder sb = new StringBuilder(); String data = ""; if ( ( svs == NH_CONVERSION_SUPPORT_VALUE_STYLE.AS_INTERNAL_NODE_NAMES ) && !isExternal() ) { if ( getBranchData().isHasConfidences() @@ -912,27 +911,19 @@ public final class PhylogenyNode implements Comparable { else if ( !ForesterUtil.isEmpty( getNodeData().getTaxonomy().getCommonName() ) ) { data = getNodeData().getTaxonomy().getCommonName(); } - else if ( getNodeData().getTaxonomy().getTaxonomyCode() != null ) { - data = getNodeData().getTaxonomy().getTaxonomyCode(); - } } else if ( getNodeData().isHasSequence() ) { if ( !ForesterUtil.isEmpty( getNodeData().getSequence().getName() ) ) { data = getNodeData().getSequence().getName(); } - } - data = data.trim(); - if ( data.length() > 0 ) { - data = data.replaceAll( "'", "_" ); - if ( ForesterUtil.isContainsParanthesesableNhCharacter( data ) ) { - sb.append( '\'' ); - sb.append( data ); - sb.append( '\'' ); + else if ( !ForesterUtil.isEmpty( getNodeData().getSequence().getSymbol() ) ) { + data = getNodeData().getSequence().getSymbol(); } - else { - sb.append( data ); + else if ( !ForesterUtil.isEmpty( getNodeData().getSequence().getGeneName() ) ) { + data = getNodeData().getSequence().getGeneName(); } } + final StringBuilder sb = ForesterUtil.santitizeStringForNH( data ); if ( write_distance_to_parent && ( getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) ) { sb.append( ":" ); sb.append( getDistanceToParent() ); @@ -954,19 +945,10 @@ public final class PhylogenyNode implements Comparable { * representation. */ final public String toNewHampshireX() { - final StringBuffer sb = new StringBuffer(); + final StringBuilder sb = new StringBuilder(); final StringBuffer s_nhx = new StringBuffer(); if ( !ForesterUtil.isEmpty( getName() ) ) { - //final String name = ForesterUtil.replaceIllegalNhCharacters( getName() ); - final String name = getName().trim(); - if ( ForesterUtil.isContainsParanthesesableNhCharacter( name ) ) { - sb.append( '\'' ); - sb.append( name ); - sb.append( '\'' ); - } - else { - sb.append( name ); - } + sb.append( ForesterUtil.santitizeStringForNH( getName() ) ); } if ( getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) { sb.append( ":" ); diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index ec44a86..8a937e9 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -384,7 +384,6 @@ public final class Test { System.out.println( "failed." ); failed++; } - System.exit( 0 ); System.out.print( "Nexus characters parsing: " ); if ( Test.testNexusCharactersParsing() ) { System.out.println( "OK." ); @@ -4388,7 +4387,7 @@ public final class Test { .equals( "Macrocera sp." ) ) { return false; } - if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" ).equals( "Macrocera sp" ) ) { + if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" ).equals( "Macrocera sp." ) ) { return false; } } @@ -8120,7 +8119,6 @@ public final class Test { if ( !p54.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ).equals( "((A,B)[88],C);" ) ) { return false; } - // final Phylogeny p55 = factory .create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1x\":0.0798012);" ), new NHXParser() )[ 0 ]; @@ -8148,6 +8146,31 @@ public final class Test { System.out.println( p56.toNewHampshire() ); return false; } + final String s58 = "('Homo \"man\" sapiens:1',\"Homo 'man' sapiens;\")';root \"1_ )';"; + final Phylogeny p58 = factory.create( new StringBuffer( s58 ), new NHXParser() )[ 0 ]; + if ( !p58.toNewHampshire().equals( s58 ) ) { + System.out.println( p58.toNewHampshire() ); + return false; + } + final String s59 = "('Homo \"man sapiens:1',\"Homo 'man sapiens\")\"root; '1_ )\";"; + final Phylogeny p59 = factory.create( new StringBuffer( s59 ), new NHXParser() )[ 0 ]; + if ( !p59.toNewHampshire().equals( s59 ) ) { + System.out.println( p59.toNewHampshire() ); + return false; + } + final String s60 = "('\" ;,:\":\"',\"'abc def' g's_\",'=:0.45+,.:%~`!@#$%^&*()_-+={} | ;,');"; + final Phylogeny p60 = factory.create( new StringBuffer( s60 ), new NHXParser() )[ 0 ]; + if ( !p60.toNewHampshire().equals( s60 ) ) { + System.out.println( p60.toNewHampshire() ); + return false; + } + final String s61 = "('H[omo] \"man\" sapiens:1',\"H[omo] 'man' sapiens;\",H[omo] sapiens)';root \"1_ )';"; + final Phylogeny p61 = factory.create( new StringBuffer( s61 ), new NHXParser() )[ 0 ]; + if ( !p61.toNewHampshire() + .equals( "('H{omo} \"man\" sapiens:1',\"H{omo} 'man' sapiens;\",Hsapiens)';root \"1_ )';" ) ) { + System.out.println( p61.toNewHampshire() ); + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -8636,6 +8659,14 @@ public final class Test { System.out.println( n6.toNewHampshireX() ); return false; } + final PhylogenyNode n7 = new PhylogenyNode(); + n7.setName( " gks:dr-m4 \" ' `@:[]sadq04 " ); + if ( !n7.toNewHampshire( true, PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) + .equals( "'gks:dr-m4 \" ` `@:[]sadq04'" ) ) { + System.out.println( n7 + .toNewHampshire( true, PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) ); + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -9072,6 +9103,12 @@ public final class Test { if ( !p10.toNewHampshireX().equals( "((A:0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) { return false; } + final Phylogeny p11 = factory + .create( " [79] ( ('A: \" ' [co mment] :0 .2[comment],B:0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],C: 0.1)[comment]root:0.1[100] [comment]", + new NHXParser() )[ 0 ]; + if ( !p11.toNewHampshireX().equals( "(('A: \"':0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -9162,13 +9199,13 @@ public final class Test { if ( phy.getNodes( "'single quotes' inside double quotes" ).size() != 1 ) { return false; } - if ( phy.getNodes( "double quotes inside single quotes" ).size() != 1 ) { + if ( phy.getNodes( "\"double quotes\" inside single quotes" ).size() != 1 ) { return false; } if ( phy.getNodes( "noquotes" ).size() != 1 ) { return false; } - if ( phy.getNodes( "A ( B C '" ).size() != 1 ) { + if ( phy.getNodes( "A ( B C '" ).size() != 1 ) { return false; } final NHXParser p1p = new NHXParser(); @@ -9198,7 +9235,7 @@ public final class Test { final Phylogeny p10 = factory .create( " [79] ( (\"A \n\tB \" [co mment] :0 .2[comment],'B':0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],'C (or D?\\//;,))': 0.1)[comment]'\nroot is here (cool, was! ) ':0.1[100] [comment]", new NHXParser() )[ 0 ]; - final String p10_clean_str = "(('A B':0.2,B:0.3):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; + final String p10_clean_str = "(('A B':0.2,B:0.3):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; if ( !p10.toNewHampshireX().equals( p10_clean_str ) ) { return false; } @@ -9206,11 +9243,10 @@ public final class Test { if ( !p11.toNewHampshireX().equals( p10_clean_str ) ) { return false; } - // final Phylogeny p12 = factory .create( " [79] ( (\"A \n\tB \" [[][] :0 .2[comment][\t&\t&\n N\tH\tX:S=mo\tnkey !],'\tB\t\b\t\n\f\rB B ':0.0\b3[])\t[com ment]: 0. 5 \t[ 9 1 ][ \ncomment],'C\t (or D?\\//;,))': 0.\b1)[comment]'\nroot \tis here (cool, \b\t\n\f\r was! ) ':0.1[100] [comment]", new NHXParser() )[ 0 ]; - final String p12_clean_str = "(('A B':0.2[&&NHX:S=monkey!],'BB B':0.03):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; + final String p12_clean_str = "(('A B':0.2[&&NHX:S=monkey!],'BB B':0.03):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]"; if ( !p12.toNewHampshireX().equals( p12_clean_str ) ) { return false; } @@ -9218,7 +9254,7 @@ public final class Test { if ( !p13.toNewHampshireX().equals( p12_clean_str ) ) { return false; } - final String p12_clean_str_nh = "(('A B':0.2,'BB B':0.03):0.5,'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1;"; + final String p12_clean_str_nh = "(('A B':0.2,'BB B':0.03):0.5,'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1;"; if ( !p13.toNewHampshire().equals( p12_clean_str_nh ) ) { return false; } diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index 300de97..899e775 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -74,24 +74,24 @@ import org.forester.surfacing.SurfacingUtil; public final class ForesterUtil { public final static String FILE_SEPARATOR = System.getProperty( "file.separator" ); - public final static String LINE_SEPARATOR = System.getProperty( "line.separator" ); + public static final NumberFormat FORMATTER_06; + public static final NumberFormat FORMATTER_3; + public static final NumberFormat FORMATTER_6; + public static final NumberFormat FORMATTER_9; public final static String JAVA_VENDOR = System.getProperty( "java.vendor" ); public final static String JAVA_VERSION = System.getProperty( "java.version" ); + public final static String LINE_SEPARATOR = System.getProperty( "line.separator" ); + public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:"; + public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/"; + public static final String NCBI_PROTEIN = "http://www.ncbi.nlm.nih.gov/protein/"; + public static final BigDecimal NULL_BD = new BigDecimal( 0 ); public final static String OS_ARCH = System.getProperty( "os.arch" ); public final static String OS_NAME = System.getProperty( "os.name" ); public final static String OS_VERSION = System.getProperty( "os.version" ); - public final static Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]'\"]" ); - public final static double ZERO_DIFF = 1.0E-9; - public static final BigDecimal NULL_BD = new BigDecimal( 0 ); - public static final NumberFormat FORMATTER_9; - public static final NumberFormat FORMATTER_6; - public static final NumberFormat FORMATTER_06; - public static final NumberFormat FORMATTER_3; - public static final String NCBI_PROTEIN = "http://www.ncbi.nlm.nih.gov/protein/"; - public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/"; - public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/"; - public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:"; public static final String PDB = "http://www.pdb.org/pdb/explore/explore.do?pdbId="; + public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/"; + public final static double ZERO_DIFF = 1.0E-9; + private static final Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]]" ); static { final DecimalFormatSymbols dfs = new DecimalFormatSymbols(); dfs.setDecimalSeparator( '.' ); @@ -105,16 +105,6 @@ public final class ForesterUtil { private ForesterUtil() { } - public static int calculateOverlap( final Domain domain, final List covered_positions ) { - int overlap_count = 0; - for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { - if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) { - ++overlap_count; - } - } - return overlap_count; - } - final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) { if ( sb.length() > 0 ) { sb.append( separator ); @@ -122,72 +112,6 @@ public final class ForesterUtil { } /** - * - * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 => - * domain with 0.3 is ignored - * - * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored - * - * - * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_ - * ignored - * - * @param max_allowed_overlap - * maximal allowed overlap (inclusive) to be still considered not - * overlapping (zero or negative value to allow any overlap) - * @param remove_engulfed_domains - * to remove domains which are completely engulfed by coverage of - * domains with better support - * @param protein - * @return - */ - public static Protein removeOverlappingDomains( final int max_allowed_overlap, - final boolean remove_engulfed_domains, - final Protein protein ) { - final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies() - .getSpeciesId(), protein.getLength() ); - final List sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein ); - final List covered_positions = new ArrayList(); - for( final Domain domain : sorted ) { - if ( ( ( max_allowed_overlap < 0 ) || ( ForesterUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) ) - && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) { - final int covered_positions_size = covered_positions.size(); - for( int i = covered_positions_size; i < domain.getFrom(); ++i ) { - covered_positions.add( false ); - } - final int new_covered_positions_size = covered_positions.size(); - for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { - if ( i < new_covered_positions_size ) { - covered_positions.set( i, true ); - } - else { - covered_positions.add( true ); - } - } - pruned_protein.addProteinDomain( domain ); - } - } - return pruned_protein; - } - - /** - * Returns true is Domain domain falls in an uninterrupted stretch of - * covered positions. - * - * @param domain - * @param covered_positions - * @return - */ - public static boolean isEngulfed( final Domain domain, final List covered_positions ) { - for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { - if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) { - return false; - } - } - return true; - } - - /** * This calculates a color. If value is equal to min the returned color is * minColor, if value is equal to max the returned color is maxColor, * otherwise a color 'proportional' to value is returned. @@ -277,6 +201,16 @@ public final class ForesterUtil { } } + public static int calculateOverlap( final Domain domain, final List covered_positions ) { + int overlap_count = 0; + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) { + ++overlap_count; + } + } + return overlap_count; + } + final public static String collapseWhiteSpace( final String s ) { return s.replaceAll( "[\\s]+", " " ); } @@ -425,16 +359,6 @@ public final class ForesterUtil { } } - public static String[] file2array( final File file ) throws IOException { - final List list = file2list( file ); - final String[] ary = new String[ list.size() ]; - int i = 0; - for( final String s : list ) { - ary[ i++ ] = s; - } - return ary; - } - public static String[][] file22dArray( final File file ) throws IOException { final List list = new ArrayList(); final BufferedReader in = new BufferedReader( new FileReader( file ) ); @@ -463,6 +387,16 @@ public final class ForesterUtil { return ary; } + public static String[] file2array( final File file ) throws IOException { + final List list = file2list( file ); + final String[] ary = new String[ list.size() ]; + int i = 0; + for( final String s : list ) { + ary[ i++ ] = s; + } + return ary; + } + final public static List file2list( final File file ) throws IOException { final List list = new ArrayList(); final BufferedReader in = new BufferedReader( new FileReader( file ) ); @@ -567,10 +501,6 @@ public final class ForesterUtil { } } - final public static boolean isContainsParanthesesableNhCharacter( final String nh ) { - return PARANTHESESABLE_NH_CHARS_PATTERN.matcher( nh ).find(); - } - final public static boolean isEmpty( final List l ) { if ( ( l == null ) || l.isEmpty() ) { return true; @@ -599,6 +529,23 @@ public final class ForesterUtil { return ( ( s == null ) || ( s.length() < 1 ) ); } + /** + * Returns true is Domain domain falls in an uninterrupted stretch of + * covered positions. + * + * @param domain + * @param covered_positions + * @return + */ + public static boolean isEngulfed( final Domain domain, final List covered_positions ) { + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) { + return false; + } + } + return true; + } + final public static boolean isEqual( final double a, final double b ) { return ( ( Math.abs( a - b ) ) < ZERO_DIFF ); } @@ -643,6 +590,16 @@ public final class ForesterUtil { } } + public final static boolean isMac() { + try { + return OS_NAME.toLowerCase().startsWith( "mac" ); + } + catch ( final Exception e ) { + ForesterUtil.printWarningMessage( Constants.PRG_NAME, "minor error: " + e ); + return false; + } + } + final public static boolean isNull( final BigDecimal s ) { return ( ( s == null ) || ( s.compareTo( NULL_BD ) == 0 ) ); } @@ -680,16 +637,6 @@ public final class ForesterUtil { } } - public final static boolean isMac() { - try { - return OS_NAME.toLowerCase().startsWith( "mac" ); - } - catch ( final Exception e ) { - ForesterUtil.printWarningMessage( Constants.PRG_NAME, "minor error: " + e ); - return false; - } - } - final public static String isWritableFile( final File f ) { if ( f.isDirectory() ) { return "[" + f + "] is a directory"; @@ -787,26 +734,184 @@ public final class ForesterUtil { } } - final public static BufferedReader obtainReader( final Object source ) throws IOException, FileNotFoundException { - BufferedReader reader = null; - if ( source instanceof File ) { - final File f = ( File ) source; - if ( !f.exists() ) { - throw new IOException( "\"" + f.getAbsolutePath() + "\" does not exist" ); + public final static Color obtainColorDependingOnTaxonomyGroup( final String tax_group ) { + if ( !ForesterUtil.isEmpty( tax_group ) ) { + if ( tax_group.equals( TaxonomyGroups.DEUTEROSTOMIA ) ) { + return TaxonomyColors.DEUTEROSTOMIA_COLOR; } - else if ( !f.isFile() ) { - throw new IOException( "\"" + f.getAbsolutePath() + "\" is not a file" ); + else if ( tax_group.equals( TaxonomyGroups.PROTOSTOMIA ) ) { + return TaxonomyColors.PROTOSTOMIA_COLOR; } - else if ( !f.canRead() ) { - throw new IOException( "\"" + f.getAbsolutePath() + "\" is not a readable" ); + else if ( tax_group.equals( TaxonomyGroups.CNIDARIA ) ) { + return TaxonomyColors.CNIDARIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.PLACOZOA ) ) { + return TaxonomyColors.PLACOZOA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.CTENOPHORA ) ) { + return TaxonomyColors.CTENOPHORA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.PORIFERA ) ) { + return TaxonomyColors.PORIFERA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.CHOANOFLAGELLIDA ) ) { + return TaxonomyColors.CHOANOFLAGELLIDA; + } + else if ( tax_group.equals( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) ) { + return TaxonomyColors.ICHTHYOSPOREA_AND_FILASTEREA; + } + else if ( tax_group.equals( TaxonomyGroups.DIKARYA ) ) { + return TaxonomyColors.DIKARYA_COLOR; + } + else if ( tax_group.equalsIgnoreCase( TaxonomyGroups.FUNGI ) + || tax_group.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) { + return TaxonomyColors.OTHER_FUNGI_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP ) ) { + return TaxonomyColors.NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.AMOEBOZOA ) ) { + return TaxonomyColors.AMOEBOZOA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.EMBRYOPHYTA ) ) { + return TaxonomyColors.EMBRYOPHYTA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.CHLOROPHYTA ) ) { + return TaxonomyColors.CHLOROPHYTA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.RHODOPHYTA ) ) { + return TaxonomyColors.RHODOPHYTA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.HACROBIA ) ) { + return TaxonomyColors.HACROBIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) ) { + return TaxonomyColors.GLAUCOPHYTA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.STRAMENOPILES ) ) { + return TaxonomyColors.STRAMENOPILES_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.ALVEOLATA ) ) { + return TaxonomyColors.ALVEOLATA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.RHIZARIA ) ) { + return TaxonomyColors.RHIZARIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.EXCAVATA ) ) { + return TaxonomyColors.EXCAVATA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.APUSOZOA ) ) { + return TaxonomyColors.APUSOZOA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.ARCHAEA ) ) { + return TaxonomyColors.ARCHAEA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.BACTERIA ) ) { + return TaxonomyColors.BACTERIA_COLOR; } - reader = new BufferedReader( new FileReader( f ) ); - } - else if ( source instanceof InputStream ) { - reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) ); - } - else if ( source instanceof String ) { - reader = new BufferedReader( new StringReader( ( String ) source ) ); + } + return null; + } + + public final static String obtainNormalizedTaxonomyGroup( final String tax ) { + if ( tax.equalsIgnoreCase( TaxonomyGroups.DEUTEROSTOMIA ) ) { + return TaxonomyGroups.DEUTEROSTOMIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.PROTOSTOMIA ) ) { + return TaxonomyGroups.PROTOSTOMIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.CNIDARIA ) ) { + return TaxonomyGroups.CNIDARIA; + } + else if ( tax.toLowerCase().startsWith( "trichoplax" ) || tax.equalsIgnoreCase( TaxonomyGroups.PLACOZOA ) ) { + return TaxonomyGroups.PLACOZOA; + } + else if ( tax.toLowerCase().startsWith( "mnemiopsis" ) || tax.equalsIgnoreCase( TaxonomyGroups.CTENOPHORA ) ) { + return TaxonomyGroups.CTENOPHORA; + } + else if ( tax.toLowerCase().startsWith( "amphimedon" ) || tax.equalsIgnoreCase( TaxonomyGroups.PORIFERA ) ) { + return TaxonomyGroups.PORIFERA; + } + else if ( tax.equalsIgnoreCase( "codonosigidae" ) || tax.equalsIgnoreCase( TaxonomyGroups.CHOANOFLAGELLIDA ) ) { + return TaxonomyGroups.CHOANOFLAGELLIDA; + } + else if ( tax.toLowerCase().startsWith( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) + || tax.toLowerCase().startsWith( "ichthyophonida and filasterea" ) + || tax.toLowerCase().startsWith( "ichthyosporea & filasterea" ) + || tax.toLowerCase().startsWith( "ichthyosporea and filasterea" ) ) { + return TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.DIKARYA ) ) { + return TaxonomyGroups.DIKARYA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.FUNGI ) || tax.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) { + return TaxonomyGroups.OTHER_FUNGI; + } + else if ( tax.toLowerCase().startsWith( "nucleariidae and fonticula" ) ) { + return TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.AMOEBOZOA ) ) { + return TaxonomyGroups.AMOEBOZOA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.EMBRYOPHYTA ) ) { + return TaxonomyGroups.EMBRYOPHYTA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.CHLOROPHYTA ) ) { + return TaxonomyGroups.CHLOROPHYTA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHODOPHYTA ) ) { + return TaxonomyGroups.RHODOPHYTA; + } + else if ( tax.toLowerCase().startsWith( TaxonomyGroups.HACROBIA ) ) { + return TaxonomyGroups.HACROBIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) || tax.equalsIgnoreCase( "glaucophyta" ) ) { + return TaxonomyGroups.GLAUCOCYSTOPHYCEAE; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.STRAMENOPILES ) ) { + return TaxonomyGroups.STRAMENOPILES; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.ALVEOLATA ) ) { + return TaxonomyGroups.ALVEOLATA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHIZARIA ) ) { + return TaxonomyGroups.RHIZARIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.EXCAVATA ) ) { + return TaxonomyGroups.EXCAVATA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.APUSOZOA ) ) { + return TaxonomyGroups.APUSOZOA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.ARCHAEA ) ) { + return TaxonomyGroups.ARCHAEA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.BACTERIA ) ) { + return TaxonomyGroups.BACTERIA; + } + return null; + } + + final public static BufferedReader obtainReader( final Object source ) throws IOException, FileNotFoundException { + BufferedReader reader = null; + if ( source instanceof File ) { + final File f = ( File ) source; + if ( !f.exists() ) { + throw new IOException( "\"" + f.getAbsolutePath() + "\" does not exist" ); + } + else if ( !f.isFile() ) { + throw new IOException( "\"" + f.getAbsolutePath() + "\" is not a file" ); + } + else if ( !f.canRead() ) { + throw new IOException( "\"" + f.getAbsolutePath() + "\" is not a readable" ); + } + reader = new BufferedReader( new FileReader( f ) ); + } + else if ( source instanceof InputStream ) { + reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) ); + } + else if ( source instanceof String ) { + reader = new BufferedReader( new StringReader( ( String ) source ) ); } else if ( source instanceof StringBuffer ) { reader = new BufferedReader( new StringReader( source.toString() ) ); @@ -818,6 +923,15 @@ public final class ForesterUtil { return reader; } + public final static void outOfMemoryError( final OutOfMemoryError e ) { + System.err.println(); + System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" ); + System.err.println(); + e.printStackTrace( System.err ); + System.err.println(); + System.exit( -1 ); + } + final public static StringBuffer pad( final double number, final int size, final char pad, final boolean left_pad ) { return pad( new StringBuffer( number + "" ), size, pad, left_pad ); } @@ -933,6 +1047,55 @@ public final class ForesterUtil { System.out.println( "[" + prg_name + "] > " + message ); } + /** + * + * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 => + * domain with 0.3 is ignored + * + * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored + * + * + * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_ + * ignored + * + * @param max_allowed_overlap + * maximal allowed overlap (inclusive) to be still considered not + * overlapping (zero or negative value to allow any overlap) + * @param remove_engulfed_domains + * to remove domains which are completely engulfed by coverage of + * domains with better support + * @param protein + * @return + */ + public static Protein removeOverlappingDomains( final int max_allowed_overlap, + final boolean remove_engulfed_domains, + final Protein protein ) { + final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies() + .getSpeciesId(), protein.getLength() ); + final List sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein ); + final List covered_positions = new ArrayList(); + for( final Domain domain : sorted ) { + if ( ( ( max_allowed_overlap < 0 ) || ( ForesterUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) ) + && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) { + final int covered_positions_size = covered_positions.size(); + for( int i = covered_positions_size; i < domain.getFrom(); ++i ) { + covered_positions.add( false ); + } + final int new_covered_positions_size = covered_positions.size(); + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( i < new_covered_positions_size ) { + covered_positions.set( i, true ); + } + else { + covered_positions.add( true ); + } + } + pruned_protein.addProteinDomain( domain ); + } + } + return pruned_protein; + } + final public static String removeSuffix( final String file_name ) { final int i = file_name.lastIndexOf( '.' ); if ( i > 1 ) { @@ -995,6 +1158,35 @@ public final class ForesterUtil { } } + public final static StringBuilder santitizeStringForNH( String data ) { + data = data.replaceAll( "\\s+", " " ).trim(); + final StringBuilder sb = new StringBuilder(); + if ( data.length() > 0 ) { + final boolean single_pars = data.indexOf( '\'' ) > -1; + final boolean double_pars = data.indexOf( '"' ) > -1; + if ( single_pars && double_pars ) { + data = data.replace( '\'', '`' ); + sb.append( '\'' ); + sb.append( data ); + sb.append( '\'' ); + } + else if ( single_pars ) { + sb.append( '"' ); + sb.append( data ); + sb.append( '"' ); + } + else if ( PARANTHESESABLE_NH_CHARS_PATTERN.matcher( data ).find() ) { + sb.append( '\'' ); + sb.append( data ); + sb.append( '\'' ); + } + else { + sb.append( data ); + } + } + return sb; + } + public static boolean seqIsLikelyToBeAa( final String s ) { final String seq = s.toLowerCase(); if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 ) @@ -1053,17 +1245,17 @@ public final class ForesterUtil { return str_array; } - final public static void unexpectedFatalError( final Exception e ) { + final public static void unexpectedFatalError( final Error e ) { System.err.println(); - System.err.println( "unexpected exception: should not have occured! Please contact program author(s)." ); + System.err.println( "unexpected error: should not have occured! Please contact program author(s)." ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); } - final public static void unexpectedFatalError( final Error e ) { + final public static void unexpectedFatalError( final Exception e ) { System.err.println(); - System.err.println( "unexpected error: should not have occured! Please contact program author(s)." ); + System.err.println( "unexpected exception: should not have occured! Please contact program author(s)." ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); @@ -1188,171 +1380,4 @@ public final class ForesterUtil { final String regex = "[\\s;,]+"; return str.split( regex ); } - - public final static void outOfMemoryError( final OutOfMemoryError e ) { - System.err.println(); - System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" ); - System.err.println(); - e.printStackTrace( System.err ); - System.err.println(); - System.exit( -1 ); - } - - public final static Color obtainColorDependingOnTaxonomyGroup( final String tax_group ) { - if ( !ForesterUtil.isEmpty( tax_group ) ) { - if ( tax_group.equals( TaxonomyGroups.DEUTEROSTOMIA ) ) { - return TaxonomyColors.DEUTEROSTOMIA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.PROTOSTOMIA ) ) { - return TaxonomyColors.PROTOSTOMIA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.CNIDARIA ) ) { - return TaxonomyColors.CNIDARIA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.PLACOZOA ) ) { - return TaxonomyColors.PLACOZOA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.CTENOPHORA ) ) { - return TaxonomyColors.CTENOPHORA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.PORIFERA ) ) { - return TaxonomyColors.PORIFERA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.CHOANOFLAGELLIDA ) ) { - return TaxonomyColors.CHOANOFLAGELLIDA; - } - else if ( tax_group.equals( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) ) { - return TaxonomyColors.ICHTHYOSPOREA_AND_FILASTEREA; - } - else if ( tax_group.equals( TaxonomyGroups.DIKARYA ) ) { - return TaxonomyColors.DIKARYA_COLOR; - } - else if ( tax_group.equalsIgnoreCase( TaxonomyGroups.FUNGI ) - || tax_group.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) { - return TaxonomyColors.OTHER_FUNGI_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP ) ) { - return TaxonomyColors.NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.AMOEBOZOA ) ) { - return TaxonomyColors.AMOEBOZOA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.EMBRYOPHYTA ) ) { - return TaxonomyColors.EMBRYOPHYTA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.CHLOROPHYTA ) ) { - return TaxonomyColors.CHLOROPHYTA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.RHODOPHYTA ) ) { - return TaxonomyColors.RHODOPHYTA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.HACROBIA ) ) { - return TaxonomyColors.HACROBIA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) ) { - return TaxonomyColors.GLAUCOPHYTA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.STRAMENOPILES ) ) { - return TaxonomyColors.STRAMENOPILES_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.ALVEOLATA ) ) { - return TaxonomyColors.ALVEOLATA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.RHIZARIA ) ) { - return TaxonomyColors.RHIZARIA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.EXCAVATA ) ) { - return TaxonomyColors.EXCAVATA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.APUSOZOA ) ) { - return TaxonomyColors.APUSOZOA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.ARCHAEA ) ) { - return TaxonomyColors.ARCHAEA_COLOR; - } - else if ( tax_group.equals( TaxonomyGroups.BACTERIA ) ) { - return TaxonomyColors.BACTERIA_COLOR; - } - } - return null; - } - - public final static String obtainNormalizedTaxonomyGroup( final String tax ) { - if ( tax.equalsIgnoreCase( TaxonomyGroups.DEUTEROSTOMIA ) ) { - return TaxonomyGroups.DEUTEROSTOMIA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.PROTOSTOMIA ) ) { - return TaxonomyGroups.PROTOSTOMIA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.CNIDARIA ) ) { - return TaxonomyGroups.CNIDARIA; - } - else if ( tax.toLowerCase().startsWith( "trichoplax" ) || tax.equalsIgnoreCase( TaxonomyGroups.PLACOZOA ) ) { - return TaxonomyGroups.PLACOZOA; - } - else if ( tax.toLowerCase().startsWith( "mnemiopsis" ) || tax.equalsIgnoreCase( TaxonomyGroups.CTENOPHORA ) ) { - return TaxonomyGroups.CTENOPHORA; - } - else if ( tax.toLowerCase().startsWith( "amphimedon" ) || tax.equalsIgnoreCase( TaxonomyGroups.PORIFERA ) ) { - return TaxonomyGroups.PORIFERA; - } - else if ( tax.equalsIgnoreCase( "codonosigidae" ) || tax.equalsIgnoreCase( TaxonomyGroups.CHOANOFLAGELLIDA ) ) { - return TaxonomyGroups.CHOANOFLAGELLIDA; - } - else if ( tax.toLowerCase().startsWith( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) - || tax.toLowerCase().startsWith( "ichthyophonida and filasterea" ) - || tax.toLowerCase().startsWith( "ichthyosporea & filasterea" ) - || tax.toLowerCase().startsWith( "ichthyosporea and filasterea" ) ) { - return TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.DIKARYA ) ) { - return TaxonomyGroups.DIKARYA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.FUNGI ) || tax.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) { - return TaxonomyGroups.OTHER_FUNGI; - } - else if ( tax.toLowerCase().startsWith( "nucleariidae and fonticula" ) ) { - return TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.AMOEBOZOA ) ) { - return TaxonomyGroups.AMOEBOZOA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.EMBRYOPHYTA ) ) { - return TaxonomyGroups.EMBRYOPHYTA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.CHLOROPHYTA ) ) { - return TaxonomyGroups.CHLOROPHYTA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHODOPHYTA ) ) { - return TaxonomyGroups.RHODOPHYTA; - } - else if ( tax.toLowerCase().startsWith( TaxonomyGroups.HACROBIA ) ) { - return TaxonomyGroups.HACROBIA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) || tax.equalsIgnoreCase( "glaucophyta" ) ) { - return TaxonomyGroups.GLAUCOCYSTOPHYCEAE; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.STRAMENOPILES ) ) { - return TaxonomyGroups.STRAMENOPILES; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.ALVEOLATA ) ) { - return TaxonomyGroups.ALVEOLATA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHIZARIA ) ) { - return TaxonomyGroups.RHIZARIA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.EXCAVATA ) ) { - return TaxonomyGroups.EXCAVATA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.APUSOZOA ) ) { - return TaxonomyGroups.APUSOZOA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.ARCHAEA ) ) { - return TaxonomyGroups.ARCHAEA; - } - else if ( tax.equalsIgnoreCase( TaxonomyGroups.BACTERIA ) ) { - return TaxonomyGroups.BACTERIA; - } - return null; - } }