System.out.println( "failed." );
failed++;
}
- System.exit( 0 );
System.out.print( "Nexus characters parsing: " );
if ( Test.testNexusCharactersParsing() ) {
System.out.println( "OK." );
.equals( "Macrocera sp." ) ) {
return false;
}
- if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" ).equals( "Macrocera sp" ) ) {
+ if ( !ParserUtils.extractScientificNameFromNodeName( "Macrocera sp" ).equals( "Macrocera sp." ) ) {
return false;
}
}
if ( !p54.toNewHampshire( NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ).equals( "((A,B)[88],C);" ) ) {
return false;
}
- //
final Phylogeny p55 = factory
.create( new StringBuffer( "((\"lcl|HPV32_L1.:1 s\":0.195593,\"lcl|HPV30_L1.1|;a\":0.114237):0.0359322,\"lcl|HPV56_L1.1|,d\":0.0727412,\"lcl|HPV66_L1.1x\":0.0798012);" ),
new NHXParser() )[ 0 ];
System.out.println( p56.toNewHampshire() );
return false;
}
+ final String s58 = "('Homo \"man\" sapiens:1',\"Homo 'man' sapiens;\")';root \"1_ )';";
+ final Phylogeny p58 = factory.create( new StringBuffer( s58 ), new NHXParser() )[ 0 ];
+ if ( !p58.toNewHampshire().equals( s58 ) ) {
+ System.out.println( p58.toNewHampshire() );
+ return false;
+ }
+ final String s59 = "('Homo \"man sapiens:1',\"Homo 'man sapiens\")\"root; '1_ )\";";
+ final Phylogeny p59 = factory.create( new StringBuffer( s59 ), new NHXParser() )[ 0 ];
+ if ( !p59.toNewHampshire().equals( s59 ) ) {
+ System.out.println( p59.toNewHampshire() );
+ return false;
+ }
+ final String s60 = "('\" ;,:\":\"',\"'abc def' g's_\",'=:0.45+,.:%~`!@#$%^&*()_-+={} | ;,');";
+ final Phylogeny p60 = factory.create( new StringBuffer( s60 ), new NHXParser() )[ 0 ];
+ if ( !p60.toNewHampshire().equals( s60 ) ) {
+ System.out.println( p60.toNewHampshire() );
+ return false;
+ }
+ final String s61 = "('H[omo] \"man\" sapiens:1',\"H[omo] 'man' sapiens;\",H[omo] sapiens)';root \"1_ )';";
+ final Phylogeny p61 = factory.create( new StringBuffer( s61 ), new NHXParser() )[ 0 ];
+ if ( !p61.toNewHampshire()
+ .equals( "('H{omo} \"man\" sapiens:1',\"H{omo} 'man' sapiens;\",Hsapiens)';root \"1_ )';" ) ) {
+ System.out.println( p61.toNewHampshire() );
+ return false;
+ }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
System.out.println( n6.toNewHampshireX() );
return false;
}
+ final PhylogenyNode n7 = new PhylogenyNode();
+ n7.setName( " gks:dr-m4 \" ' `@:[]sadq04 " );
+ if ( !n7.toNewHampshire( true, PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS )
+ .equals( "'gks:dr-m4 \" ` `@:[]sadq04'" ) ) {
+ System.out.println( n7
+ .toNewHampshire( true, PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE.IN_SQUARE_BRACKETS ) );
+ return false;
+ }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
if ( !p10.toNewHampshireX().equals( "((A:0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) {
return false;
}
+ final Phylogeny p11 = factory
+ .create( " [79] ( ('A: \" ' [co mment] :0 .2[comment],B:0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],C: 0.1)[comment]root:0.1[100] [comment]",
+ new NHXParser() )[ 0 ];
+ if ( !p11.toNewHampshireX().equals( "(('A: \"':0.2,B:0.3):0.5[&&NHX:B=91],C:0.1)root:0.1[&&NHX:B=100]" ) ) {
+ return false;
+ }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
if ( phy.getNodes( "'single quotes' inside double quotes" ).size() != 1 ) {
return false;
}
- if ( phy.getNodes( "double quotes inside single quotes" ).size() != 1 ) {
+ if ( phy.getNodes( "\"double quotes\" inside single quotes" ).size() != 1 ) {
return false;
}
if ( phy.getNodes( "noquotes" ).size() != 1 ) {
return false;
}
- if ( phy.getNodes( "A ( B C '" ).size() != 1 ) {
+ if ( phy.getNodes( "A ( B C '" ).size() != 1 ) {
return false;
}
final NHXParser p1p = new NHXParser();
final Phylogeny p10 = factory
.create( " [79] ( (\"A \n\tB \" [co mment] :0 .2[comment],'B':0.3[com])[com ment]: 0. 5 \t[ 9 1 ][ comment],'C (or D?\\//;,))': 0.1)[comment]'\nroot is here (cool, was! ) ':0.1[100] [comment]",
new NHXParser() )[ 0 ];
- final String p10_clean_str = "(('A B':0.2,B:0.3):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]";
+ final String p10_clean_str = "(('A B':0.2,B:0.3):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]";
if ( !p10.toNewHampshireX().equals( p10_clean_str ) ) {
return false;
}
if ( !p11.toNewHampshireX().equals( p10_clean_str ) ) {
return false;
}
- //
final Phylogeny p12 = factory
.create( " [79] ( (\"A \n\tB \" [[][] :0 .2[comment][\t&\t&\n N\tH\tX:S=mo\tnkey !],'\tB\t\b\t\n\f\rB B ':0.0\b3[])\t[com ment]: 0. 5 \t[ 9 1 ][ \ncomment],'C\t (or D?\\//;,))': 0.\b1)[comment]'\nroot \tis here (cool, \b\t\n\f\r was! ) ':0.1[100] [comment]",
new NHXParser() )[ 0 ];
- final String p12_clean_str = "(('A B':0.2[&&NHX:S=monkey!],'BB B':0.03):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]";
+ final String p12_clean_str = "(('A B':0.2[&&NHX:S=monkey!],'BB B':0.03):0.5[&&NHX:B=91],'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1[&&NHX:B=100]";
if ( !p12.toNewHampshireX().equals( p12_clean_str ) ) {
return false;
}
if ( !p13.toNewHampshireX().equals( p12_clean_str ) ) {
return false;
}
- final String p12_clean_str_nh = "(('A B':0.2,'BB B':0.03):0.5,'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1;";
+ final String p12_clean_str_nh = "(('A B':0.2,'BB B':0.03):0.5,'C (or D?\\//;,))':0.1)'root is here (cool, was! )':0.1;";
if ( !p13.toNewHampshire().equals( p12_clean_str_nh ) ) {
return false;
}
public final class ForesterUtil {
public final static String FILE_SEPARATOR = System.getProperty( "file.separator" );
- public final static String LINE_SEPARATOR = System.getProperty( "line.separator" );
+ public static final NumberFormat FORMATTER_06;
+ public static final NumberFormat FORMATTER_3;
+ public static final NumberFormat FORMATTER_6;
+ public static final NumberFormat FORMATTER_9;
public final static String JAVA_VENDOR = System.getProperty( "java.vendor" );
public final static String JAVA_VERSION = System.getProperty( "java.version" );
+ public final static String LINE_SEPARATOR = System.getProperty( "line.separator" );
+ public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:";
+ public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/";
+ public static final String NCBI_PROTEIN = "http://www.ncbi.nlm.nih.gov/protein/";
+ public static final BigDecimal NULL_BD = new BigDecimal( 0 );
public final static String OS_ARCH = System.getProperty( "os.arch" );
public final static String OS_NAME = System.getProperty( "os.name" );
public final static String OS_VERSION = System.getProperty( "os.version" );
- public final static Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]'\"]" );
- public final static double ZERO_DIFF = 1.0E-9;
- public static final BigDecimal NULL_BD = new BigDecimal( 0 );
- public static final NumberFormat FORMATTER_9;
- public static final NumberFormat FORMATTER_6;
- public static final NumberFormat FORMATTER_06;
- public static final NumberFormat FORMATTER_3;
- public static final String NCBI_PROTEIN = "http://www.ncbi.nlm.nih.gov/protein/";
- public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/";
- public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/";
- public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:";
public static final String PDB = "http://www.pdb.org/pdb/explore/explore.do?pdbId=";
+ public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/";
+ public final static double ZERO_DIFF = 1.0E-9;
+ private static final Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]]" );
static {
final DecimalFormatSymbols dfs = new DecimalFormatSymbols();
dfs.setDecimalSeparator( '.' );
private ForesterUtil() {
}
- public static int calculateOverlap( final Domain domain, final List<Boolean> covered_positions ) {
- int overlap_count = 0;
- for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
- if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) {
- ++overlap_count;
- }
- }
- return overlap_count;
- }
-
final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) {
if ( sb.length() > 0 ) {
sb.append( separator );
}
/**
- *
- * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 =>
- * domain with 0.3 is ignored
- *
- * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored
- *
- *
- * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_
- * ignored
- *
- * @param max_allowed_overlap
- * maximal allowed overlap (inclusive) to be still considered not
- * overlapping (zero or negative value to allow any overlap)
- * @param remove_engulfed_domains
- * to remove domains which are completely engulfed by coverage of
- * domains with better support
- * @param protein
- * @return
- */
- public static Protein removeOverlappingDomains( final int max_allowed_overlap,
- final boolean remove_engulfed_domains,
- final Protein protein ) {
- final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies()
- .getSpeciesId(), protein.getLength() );
- final List<Domain> sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein );
- final List<Boolean> covered_positions = new ArrayList<Boolean>();
- for( final Domain domain : sorted ) {
- if ( ( ( max_allowed_overlap < 0 ) || ( ForesterUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) )
- && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) {
- final int covered_positions_size = covered_positions.size();
- for( int i = covered_positions_size; i < domain.getFrom(); ++i ) {
- covered_positions.add( false );
- }
- final int new_covered_positions_size = covered_positions.size();
- for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
- if ( i < new_covered_positions_size ) {
- covered_positions.set( i, true );
- }
- else {
- covered_positions.add( true );
- }
- }
- pruned_protein.addProteinDomain( domain );
- }
- }
- return pruned_protein;
- }
-
- /**
- * Returns true is Domain domain falls in an uninterrupted stretch of
- * covered positions.
- *
- * @param domain
- * @param covered_positions
- * @return
- */
- public static boolean isEngulfed( final Domain domain, final List<Boolean> covered_positions ) {
- for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
- if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) {
- return false;
- }
- }
- return true;
- }
-
- /**
* This calculates a color. If value is equal to min the returned color is
* minColor, if value is equal to max the returned color is maxColor,
* otherwise a color 'proportional' to value is returned.
}
}
+ public static int calculateOverlap( final Domain domain, final List<Boolean> covered_positions ) {
+ int overlap_count = 0;
+ for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+ if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) {
+ ++overlap_count;
+ }
+ }
+ return overlap_count;
+ }
+
final public static String collapseWhiteSpace( final String s ) {
return s.replaceAll( "[\\s]+", " " );
}
}
}
- public static String[] file2array( final File file ) throws IOException {
- final List<String> list = file2list( file );
- final String[] ary = new String[ list.size() ];
- int i = 0;
- for( final String s : list ) {
- ary[ i++ ] = s;
- }
- return ary;
- }
-
public static String[][] file22dArray( final File file ) throws IOException {
final List<String> list = new ArrayList<String>();
final BufferedReader in = new BufferedReader( new FileReader( file ) );
return ary;
}
+ public static String[] file2array( final File file ) throws IOException {
+ final List<String> list = file2list( file );
+ final String[] ary = new String[ list.size() ];
+ int i = 0;
+ for( final String s : list ) {
+ ary[ i++ ] = s;
+ }
+ return ary;
+ }
+
final public static List<String> file2list( final File file ) throws IOException {
final List<String> list = new ArrayList<String>();
final BufferedReader in = new BufferedReader( new FileReader( file ) );
}
}
- final public static boolean isContainsParanthesesableNhCharacter( final String nh ) {
- return PARANTHESESABLE_NH_CHARS_PATTERN.matcher( nh ).find();
- }
-
final public static boolean isEmpty( final List<?> l ) {
if ( ( l == null ) || l.isEmpty() ) {
return true;
return ( ( s == null ) || ( s.length() < 1 ) );
}
+ /**
+ * Returns true is Domain domain falls in an uninterrupted stretch of
+ * covered positions.
+ *
+ * @param domain
+ * @param covered_positions
+ * @return
+ */
+ public static boolean isEngulfed( final Domain domain, final List<Boolean> covered_positions ) {
+ for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+ if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
final public static boolean isEqual( final double a, final double b ) {
return ( ( Math.abs( a - b ) ) < ZERO_DIFF );
}
}
}
+ public final static boolean isMac() {
+ try {
+ return OS_NAME.toLowerCase().startsWith( "mac" );
+ }
+ catch ( final Exception e ) {
+ ForesterUtil.printWarningMessage( Constants.PRG_NAME, "minor error: " + e );
+ return false;
+ }
+ }
+
final public static boolean isNull( final BigDecimal s ) {
return ( ( s == null ) || ( s.compareTo( NULL_BD ) == 0 ) );
}
}
}
- public final static boolean isMac() {
- try {
- return OS_NAME.toLowerCase().startsWith( "mac" );
- }
- catch ( final Exception e ) {
- ForesterUtil.printWarningMessage( Constants.PRG_NAME, "minor error: " + e );
- return false;
- }
- }
-
final public static String isWritableFile( final File f ) {
if ( f.isDirectory() ) {
return "[" + f + "] is a directory";
}
}
- final public static BufferedReader obtainReader( final Object source ) throws IOException, FileNotFoundException {
- BufferedReader reader = null;
- if ( source instanceof File ) {
- final File f = ( File ) source;
- if ( !f.exists() ) {
- throw new IOException( "\"" + f.getAbsolutePath() + "\" does not exist" );
+ public final static Color obtainColorDependingOnTaxonomyGroup( final String tax_group ) {
+ if ( !ForesterUtil.isEmpty( tax_group ) ) {
+ if ( tax_group.equals( TaxonomyGroups.DEUTEROSTOMIA ) ) {
+ return TaxonomyColors.DEUTEROSTOMIA_COLOR;
}
- else if ( !f.isFile() ) {
- throw new IOException( "\"" + f.getAbsolutePath() + "\" is not a file" );
+ else if ( tax_group.equals( TaxonomyGroups.PROTOSTOMIA ) ) {
+ return TaxonomyColors.PROTOSTOMIA_COLOR;
}
- else if ( !f.canRead() ) {
- throw new IOException( "\"" + f.getAbsolutePath() + "\" is not a readable" );
+ else if ( tax_group.equals( TaxonomyGroups.CNIDARIA ) ) {
+ return TaxonomyColors.CNIDARIA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.PLACOZOA ) ) {
+ return TaxonomyColors.PLACOZOA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.CTENOPHORA ) ) {
+ return TaxonomyColors.CTENOPHORA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.PORIFERA ) ) {
+ return TaxonomyColors.PORIFERA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.CHOANOFLAGELLIDA ) ) {
+ return TaxonomyColors.CHOANOFLAGELLIDA;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) ) {
+ return TaxonomyColors.ICHTHYOSPOREA_AND_FILASTEREA;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.DIKARYA ) ) {
+ return TaxonomyColors.DIKARYA_COLOR;
+ }
+ else if ( tax_group.equalsIgnoreCase( TaxonomyGroups.FUNGI )
+ || tax_group.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) {
+ return TaxonomyColors.OTHER_FUNGI_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP ) ) {
+ return TaxonomyColors.NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.AMOEBOZOA ) ) {
+ return TaxonomyColors.AMOEBOZOA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.EMBRYOPHYTA ) ) {
+ return TaxonomyColors.EMBRYOPHYTA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.CHLOROPHYTA ) ) {
+ return TaxonomyColors.CHLOROPHYTA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.RHODOPHYTA ) ) {
+ return TaxonomyColors.RHODOPHYTA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.HACROBIA ) ) {
+ return TaxonomyColors.HACROBIA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) ) {
+ return TaxonomyColors.GLAUCOPHYTA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.STRAMENOPILES ) ) {
+ return TaxonomyColors.STRAMENOPILES_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.ALVEOLATA ) ) {
+ return TaxonomyColors.ALVEOLATA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.RHIZARIA ) ) {
+ return TaxonomyColors.RHIZARIA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.EXCAVATA ) ) {
+ return TaxonomyColors.EXCAVATA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.APUSOZOA ) ) {
+ return TaxonomyColors.APUSOZOA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.ARCHAEA ) ) {
+ return TaxonomyColors.ARCHAEA_COLOR;
+ }
+ else if ( tax_group.equals( TaxonomyGroups.BACTERIA ) ) {
+ return TaxonomyColors.BACTERIA_COLOR;
}
- reader = new BufferedReader( new FileReader( f ) );
- }
- else if ( source instanceof InputStream ) {
- reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) );
- }
- else if ( source instanceof String ) {
- reader = new BufferedReader( new StringReader( ( String ) source ) );
+ }
+ return null;
+ }
+
+ public final static String obtainNormalizedTaxonomyGroup( final String tax ) {
+ if ( tax.equalsIgnoreCase( TaxonomyGroups.DEUTEROSTOMIA ) ) {
+ return TaxonomyGroups.DEUTEROSTOMIA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.PROTOSTOMIA ) ) {
+ return TaxonomyGroups.PROTOSTOMIA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.CNIDARIA ) ) {
+ return TaxonomyGroups.CNIDARIA;
+ }
+ else if ( tax.toLowerCase().startsWith( "trichoplax" ) || tax.equalsIgnoreCase( TaxonomyGroups.PLACOZOA ) ) {
+ return TaxonomyGroups.PLACOZOA;
+ }
+ else if ( tax.toLowerCase().startsWith( "mnemiopsis" ) || tax.equalsIgnoreCase( TaxonomyGroups.CTENOPHORA ) ) {
+ return TaxonomyGroups.CTENOPHORA;
+ }
+ else if ( tax.toLowerCase().startsWith( "amphimedon" ) || tax.equalsIgnoreCase( TaxonomyGroups.PORIFERA ) ) {
+ return TaxonomyGroups.PORIFERA;
+ }
+ else if ( tax.equalsIgnoreCase( "codonosigidae" ) || tax.equalsIgnoreCase( TaxonomyGroups.CHOANOFLAGELLIDA ) ) {
+ return TaxonomyGroups.CHOANOFLAGELLIDA;
+ }
+ else if ( tax.toLowerCase().startsWith( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA )
+ || tax.toLowerCase().startsWith( "ichthyophonida and filasterea" )
+ || tax.toLowerCase().startsWith( "ichthyosporea & filasterea" )
+ || tax.toLowerCase().startsWith( "ichthyosporea and filasterea" ) ) {
+ return TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.DIKARYA ) ) {
+ return TaxonomyGroups.DIKARYA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.FUNGI ) || tax.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) {
+ return TaxonomyGroups.OTHER_FUNGI;
+ }
+ else if ( tax.toLowerCase().startsWith( "nucleariidae and fonticula" ) ) {
+ return TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.AMOEBOZOA ) ) {
+ return TaxonomyGroups.AMOEBOZOA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.EMBRYOPHYTA ) ) {
+ return TaxonomyGroups.EMBRYOPHYTA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.CHLOROPHYTA ) ) {
+ return TaxonomyGroups.CHLOROPHYTA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHODOPHYTA ) ) {
+ return TaxonomyGroups.RHODOPHYTA;
+ }
+ else if ( tax.toLowerCase().startsWith( TaxonomyGroups.HACROBIA ) ) {
+ return TaxonomyGroups.HACROBIA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) || tax.equalsIgnoreCase( "glaucophyta" ) ) {
+ return TaxonomyGroups.GLAUCOCYSTOPHYCEAE;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.STRAMENOPILES ) ) {
+ return TaxonomyGroups.STRAMENOPILES;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.ALVEOLATA ) ) {
+ return TaxonomyGroups.ALVEOLATA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHIZARIA ) ) {
+ return TaxonomyGroups.RHIZARIA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.EXCAVATA ) ) {
+ return TaxonomyGroups.EXCAVATA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.APUSOZOA ) ) {
+ return TaxonomyGroups.APUSOZOA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.ARCHAEA ) ) {
+ return TaxonomyGroups.ARCHAEA;
+ }
+ else if ( tax.equalsIgnoreCase( TaxonomyGroups.BACTERIA ) ) {
+ return TaxonomyGroups.BACTERIA;
+ }
+ return null;
+ }
+
+ final public static BufferedReader obtainReader( final Object source ) throws IOException, FileNotFoundException {
+ BufferedReader reader = null;
+ if ( source instanceof File ) {
+ final File f = ( File ) source;
+ if ( !f.exists() ) {
+ throw new IOException( "\"" + f.getAbsolutePath() + "\" does not exist" );
+ }
+ else if ( !f.isFile() ) {
+ throw new IOException( "\"" + f.getAbsolutePath() + "\" is not a file" );
+ }
+ else if ( !f.canRead() ) {
+ throw new IOException( "\"" + f.getAbsolutePath() + "\" is not a readable" );
+ }
+ reader = new BufferedReader( new FileReader( f ) );
+ }
+ else if ( source instanceof InputStream ) {
+ reader = new BufferedReader( new InputStreamReader( ( InputStream ) source ) );
+ }
+ else if ( source instanceof String ) {
+ reader = new BufferedReader( new StringReader( ( String ) source ) );
}
else if ( source instanceof StringBuffer ) {
reader = new BufferedReader( new StringReader( source.toString() ) );
return reader;
}
+ public final static void outOfMemoryError( final OutOfMemoryError e ) {
+ System.err.println();
+ System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" );
+ System.err.println();
+ e.printStackTrace( System.err );
+ System.err.println();
+ System.exit( -1 );
+ }
+
final public static StringBuffer pad( final double number, final int size, final char pad, final boolean left_pad ) {
return pad( new StringBuffer( number + "" ), size, pad, left_pad );
}
System.out.println( "[" + prg_name + "] > " + message );
}
+ /**
+ *
+ * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 =>
+ * domain with 0.3 is ignored
+ *
+ * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored
+ *
+ *
+ * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_
+ * ignored
+ *
+ * @param max_allowed_overlap
+ * maximal allowed overlap (inclusive) to be still considered not
+ * overlapping (zero or negative value to allow any overlap)
+ * @param remove_engulfed_domains
+ * to remove domains which are completely engulfed by coverage of
+ * domains with better support
+ * @param protein
+ * @return
+ */
+ public static Protein removeOverlappingDomains( final int max_allowed_overlap,
+ final boolean remove_engulfed_domains,
+ final Protein protein ) {
+ final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies()
+ .getSpeciesId(), protein.getLength() );
+ final List<Domain> sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein );
+ final List<Boolean> covered_positions = new ArrayList<Boolean>();
+ for( final Domain domain : sorted ) {
+ if ( ( ( max_allowed_overlap < 0 ) || ( ForesterUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) )
+ && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) {
+ final int covered_positions_size = covered_positions.size();
+ for( int i = covered_positions_size; i < domain.getFrom(); ++i ) {
+ covered_positions.add( false );
+ }
+ final int new_covered_positions_size = covered_positions.size();
+ for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+ if ( i < new_covered_positions_size ) {
+ covered_positions.set( i, true );
+ }
+ else {
+ covered_positions.add( true );
+ }
+ }
+ pruned_protein.addProteinDomain( domain );
+ }
+ }
+ return pruned_protein;
+ }
+
final public static String removeSuffix( final String file_name ) {
final int i = file_name.lastIndexOf( '.' );
if ( i > 1 ) {
}
}
+ public final static StringBuilder santitizeStringForNH( String data ) {
+ data = data.replaceAll( "\\s+", " " ).trim();
+ final StringBuilder sb = new StringBuilder();
+ if ( data.length() > 0 ) {
+ final boolean single_pars = data.indexOf( '\'' ) > -1;
+ final boolean double_pars = data.indexOf( '"' ) > -1;
+ if ( single_pars && double_pars ) {
+ data = data.replace( '\'', '`' );
+ sb.append( '\'' );
+ sb.append( data );
+ sb.append( '\'' );
+ }
+ else if ( single_pars ) {
+ sb.append( '"' );
+ sb.append( data );
+ sb.append( '"' );
+ }
+ else if ( PARANTHESESABLE_NH_CHARS_PATTERN.matcher( data ).find() ) {
+ sb.append( '\'' );
+ sb.append( data );
+ sb.append( '\'' );
+ }
+ else {
+ sb.append( data );
+ }
+ }
+ return sb;
+ }
+
public static boolean seqIsLikelyToBeAa( final String s ) {
final String seq = s.toLowerCase();
if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 )
return str_array;
}
- final public static void unexpectedFatalError( final Exception e ) {
+ final public static void unexpectedFatalError( final Error e ) {
System.err.println();
- System.err.println( "unexpected exception: should not have occured! Please contact program author(s)." );
+ System.err.println( "unexpected error: should not have occured! Please contact program author(s)." );
e.printStackTrace( System.err );
System.err.println();
System.exit( -1 );
}
- final public static void unexpectedFatalError( final Error e ) {
+ final public static void unexpectedFatalError( final Exception e ) {
System.err.println();
- System.err.println( "unexpected error: should not have occured! Please contact program author(s)." );
+ System.err.println( "unexpected exception: should not have occured! Please contact program author(s)." );
e.printStackTrace( System.err );
System.err.println();
System.exit( -1 );
final String regex = "[\\s;,]+";
return str.split( regex );
}
-
- public final static void outOfMemoryError( final OutOfMemoryError e ) {
- System.err.println();
- System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" );
- System.err.println();
- e.printStackTrace( System.err );
- System.err.println();
- System.exit( -1 );
- }
-
- public final static Color obtainColorDependingOnTaxonomyGroup( final String tax_group ) {
- if ( !ForesterUtil.isEmpty( tax_group ) ) {
- if ( tax_group.equals( TaxonomyGroups.DEUTEROSTOMIA ) ) {
- return TaxonomyColors.DEUTEROSTOMIA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.PROTOSTOMIA ) ) {
- return TaxonomyColors.PROTOSTOMIA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.CNIDARIA ) ) {
- return TaxonomyColors.CNIDARIA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.PLACOZOA ) ) {
- return TaxonomyColors.PLACOZOA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.CTENOPHORA ) ) {
- return TaxonomyColors.CTENOPHORA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.PORIFERA ) ) {
- return TaxonomyColors.PORIFERA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.CHOANOFLAGELLIDA ) ) {
- return TaxonomyColors.CHOANOFLAGELLIDA;
- }
- else if ( tax_group.equals( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) ) {
- return TaxonomyColors.ICHTHYOSPOREA_AND_FILASTEREA;
- }
- else if ( tax_group.equals( TaxonomyGroups.DIKARYA ) ) {
- return TaxonomyColors.DIKARYA_COLOR;
- }
- else if ( tax_group.equalsIgnoreCase( TaxonomyGroups.FUNGI )
- || tax_group.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) {
- return TaxonomyColors.OTHER_FUNGI_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP ) ) {
- return TaxonomyColors.NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.AMOEBOZOA ) ) {
- return TaxonomyColors.AMOEBOZOA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.EMBRYOPHYTA ) ) {
- return TaxonomyColors.EMBRYOPHYTA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.CHLOROPHYTA ) ) {
- return TaxonomyColors.CHLOROPHYTA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.RHODOPHYTA ) ) {
- return TaxonomyColors.RHODOPHYTA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.HACROBIA ) ) {
- return TaxonomyColors.HACROBIA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) ) {
- return TaxonomyColors.GLAUCOPHYTA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.STRAMENOPILES ) ) {
- return TaxonomyColors.STRAMENOPILES_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.ALVEOLATA ) ) {
- return TaxonomyColors.ALVEOLATA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.RHIZARIA ) ) {
- return TaxonomyColors.RHIZARIA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.EXCAVATA ) ) {
- return TaxonomyColors.EXCAVATA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.APUSOZOA ) ) {
- return TaxonomyColors.APUSOZOA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.ARCHAEA ) ) {
- return TaxonomyColors.ARCHAEA_COLOR;
- }
- else if ( tax_group.equals( TaxonomyGroups.BACTERIA ) ) {
- return TaxonomyColors.BACTERIA_COLOR;
- }
- }
- return null;
- }
-
- public final static String obtainNormalizedTaxonomyGroup( final String tax ) {
- if ( tax.equalsIgnoreCase( TaxonomyGroups.DEUTEROSTOMIA ) ) {
- return TaxonomyGroups.DEUTEROSTOMIA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.PROTOSTOMIA ) ) {
- return TaxonomyGroups.PROTOSTOMIA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.CNIDARIA ) ) {
- return TaxonomyGroups.CNIDARIA;
- }
- else if ( tax.toLowerCase().startsWith( "trichoplax" ) || tax.equalsIgnoreCase( TaxonomyGroups.PLACOZOA ) ) {
- return TaxonomyGroups.PLACOZOA;
- }
- else if ( tax.toLowerCase().startsWith( "mnemiopsis" ) || tax.equalsIgnoreCase( TaxonomyGroups.CTENOPHORA ) ) {
- return TaxonomyGroups.CTENOPHORA;
- }
- else if ( tax.toLowerCase().startsWith( "amphimedon" ) || tax.equalsIgnoreCase( TaxonomyGroups.PORIFERA ) ) {
- return TaxonomyGroups.PORIFERA;
- }
- else if ( tax.equalsIgnoreCase( "codonosigidae" ) || tax.equalsIgnoreCase( TaxonomyGroups.CHOANOFLAGELLIDA ) ) {
- return TaxonomyGroups.CHOANOFLAGELLIDA;
- }
- else if ( tax.toLowerCase().startsWith( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA )
- || tax.toLowerCase().startsWith( "ichthyophonida and filasterea" )
- || tax.toLowerCase().startsWith( "ichthyosporea & filasterea" )
- || tax.toLowerCase().startsWith( "ichthyosporea and filasterea" ) ) {
- return TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.DIKARYA ) ) {
- return TaxonomyGroups.DIKARYA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.FUNGI ) || tax.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) {
- return TaxonomyGroups.OTHER_FUNGI;
- }
- else if ( tax.toLowerCase().startsWith( "nucleariidae and fonticula" ) ) {
- return TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.AMOEBOZOA ) ) {
- return TaxonomyGroups.AMOEBOZOA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.EMBRYOPHYTA ) ) {
- return TaxonomyGroups.EMBRYOPHYTA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.CHLOROPHYTA ) ) {
- return TaxonomyGroups.CHLOROPHYTA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHODOPHYTA ) ) {
- return TaxonomyGroups.RHODOPHYTA;
- }
- else if ( tax.toLowerCase().startsWith( TaxonomyGroups.HACROBIA ) ) {
- return TaxonomyGroups.HACROBIA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) || tax.equalsIgnoreCase( "glaucophyta" ) ) {
- return TaxonomyGroups.GLAUCOCYSTOPHYCEAE;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.STRAMENOPILES ) ) {
- return TaxonomyGroups.STRAMENOPILES;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.ALVEOLATA ) ) {
- return TaxonomyGroups.ALVEOLATA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHIZARIA ) ) {
- return TaxonomyGroups.RHIZARIA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.EXCAVATA ) ) {
- return TaxonomyGroups.EXCAVATA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.APUSOZOA ) ) {
- return TaxonomyGroups.APUSOZOA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.ARCHAEA ) ) {
- return TaxonomyGroups.ARCHAEA;
- }
- else if ( tax.equalsIgnoreCase( TaxonomyGroups.BACTERIA ) ) {
- return TaxonomyGroups.BACTERIA;
- }
- return null;
- }
}