// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.util;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.forester.archaeopteryx.Constants;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.Distribution;
import org.forester.phylogeny.data.Sequence;
import org.forester.phylogeny.data.Taxonomy;
+import org.forester.protein.BasicProtein;
+import org.forester.protein.Domain;
+import org.forester.protein.Protein;
+import org.forester.surfacing.SurfacingUtil;
public final class ForesterUtil {
public static final NumberFormat FORMATTER_6;
public static final NumberFormat FORMATTER_06;
public static final NumberFormat FORMATTER_3;
+ public static final String NCBI_PROTEIN = "http://www.ncbi.nlm.nih.gov/protein/";
+ public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/";
+ public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/";
+ public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:";
+ public final static Color DEUTEROSTOMIA_COLOR = new Color( 255, 0, 0 );
+ public final static Color PROTOSTOMIA_COLOR = new Color( 204, 0, 0 );
+ public final static Color METAZOA_COLOR = new Color( 204, 0, 102 );
+ public final static Color HOLOZOA_COLOR = new Color( 127, 0, 255 );
+ public final static Color FUNGI_COLOR = new Color( 255, 153, 0 );
+ public final static Color HOLOMYCOTA_COLOR = new Color( 204, 102, 0 );
+ public final static Color AMOEBOZOA_COLOR = new Color( 255, 0, 255 );
+ public final static Color VIRIDPLANTAE_COLOR = new Color( 0, 255, 0 );
+ public final static Color RHODOPHYTA_COLOR = new Color( 0, 153, 76 );
+ public final static Color HACROBIA_COLOR = new Color( 0, 102, 51 );
+ public final static Color GLAUCOPHYTA_COLOR = new Color( 0, 102, 51 );
+ public final static Color STRAMENOPILES_COLOR = new Color( 0, 0, 255 );
+ public final static Color ALVEOLATA_COLOR = new Color( 0, 128, 255 );
+ public final static Color RHIZARIA_COLOR = new Color( 0, 255, 255 );
+ public static final Color APUSOZOA_COLOR = new Color( 204, 255, 255 );
+ public final static Color EXCAVATA_COLOR = new Color( 204, 204, 0 );
+ public final static Color ARCHAEA_COLOR = new Color( 160, 160, 160 );
+ public final static Color BACTERIA_COLOR = new Color( 64, 64, 64 );
static {
final DecimalFormatSymbols dfs = new DecimalFormatSymbols();
dfs.setDecimalSeparator( '.' );
private ForesterUtil() {
}
- public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) {
- if ( !node.getNodeData().isHasTaxonomy() ) {
- node.getNodeData().setTaxonomy( new Taxonomy() );
- }
- }
-
- public static void ensurePresenceOfSequence( final PhylogenyNode node ) {
- if ( !node.getNodeData().isHasSequence() ) {
- node.getNodeData().setSequence( new Sequence() );
- }
- }
-
- final public static void ensurePresenceOfDistribution( final PhylogenyNode node ) {
- if ( !node.getNodeData().isHasDistribution() ) {
- node.getNodeData().setDistribution( new Distribution( "" ) );
- }
- }
-
- final public static void ensurePresenceOfDate( final PhylogenyNode node ) {
- if ( !node.getNodeData().isHasDate() ) {
- node.getNodeData().setDate( new org.forester.phylogeny.data.Date() );
+ public static int calculateOverlap( final Domain domain, final List<Boolean> covered_positions ) {
+ int overlap_count = 0;
+ for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+ if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) {
+ ++overlap_count;
+ }
}
+ return overlap_count;
}
final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) {
}
}
- public static boolean isWindowns() {
- return ForesterUtil.OS_NAME.toLowerCase().indexOf( "win" ) > -1;
- }
-
- final public static String getForesterLibraryInformation() {
- return "forester " + ForesterConstants.FORESTER_VERSION + " (" + ForesterConstants.FORESTER_DATE + ")";
+ /**
+ *
+ * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 =>
+ * domain with 0.3 is ignored
+ *
+ * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored
+ *
+ *
+ * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_
+ * ignored
+ *
+ * @param max_allowed_overlap
+ * maximal allowed overlap (inclusive) to be still considered not
+ * overlapping (zero or negative value to allow any overlap)
+ * @param remove_engulfed_domains
+ * to remove domains which are completely engulfed by coverage of
+ * domains with better support
+ * @param protein
+ * @return
+ */
+ public static Protein removeOverlappingDomains( final int max_allowed_overlap,
+ final boolean remove_engulfed_domains,
+ final Protein protein ) {
+ final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies()
+ .getSpeciesId(), protein.getLength() );
+ final List<Domain> sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein );
+ final List<Boolean> covered_positions = new ArrayList<Boolean>();
+ for( final Domain domain : sorted ) {
+ if ( ( ( max_allowed_overlap < 0 ) || ( ForesterUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) )
+ && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) {
+ final int covered_positions_size = covered_positions.size();
+ for( int i = covered_positions_size; i < domain.getFrom(); ++i ) {
+ covered_positions.add( false );
+ }
+ final int new_covered_positions_size = covered_positions.size();
+ for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+ if ( i < new_covered_positions_size ) {
+ covered_positions.set( i, true );
+ }
+ else {
+ covered_positions.add( true );
+ }
+ }
+ pruned_protein.addProteinDomain( domain );
+ }
+ }
+ return pruned_protein;
}
- public static boolean seqIsLikelyToBeAa( final String s ) {
- final String seq = s.toLowerCase();
- if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 )
- || ( seq.indexOf( 'q' ) > -1 ) || ( seq.indexOf( 'h' ) > -1 ) || ( seq.indexOf( 'k' ) > -1 )
- || ( seq.indexOf( 'w' ) > -1 ) || ( seq.indexOf( 's' ) > -1 ) || ( seq.indexOf( 'm' ) > -1 )
- || ( seq.indexOf( 'p' ) > -1 ) || ( seq.indexOf( 'v' ) > -1 ) ) {
- return true;
+ /**
+ * Returns true is Domain domain falls in an uninterrupted stretch of
+ * covered positions.
+ *
+ * @param domain
+ * @param covered_positions
+ * @return
+ */
+ public static boolean isEngulfed( final Domain domain, final List<Boolean> covered_positions ) {
+ for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+ if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) {
+ return false;
+ }
}
- return false;
+ return true;
}
/**
}
}
- /**
- * Helper method for calcColor methods.
- *
- * @param smallercolor_component_x
- * color component the smaller color
- * @param largercolor_component_x
- * color component the larger color
- * @param x
- * factor
- * @return an int representing a color component
- */
- final private static int calculateColorComponent( final double smallercolor_component_x,
- final double largercolor_component_x,
- final double x ) {
- return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) );
- }
-
- /**
- * Helper method for calcColor methods.
- *
- *
- * @param value
- * the value
- * @param larger
- * the largest value
- * @param smaller
- * the smallest value
- * @return a normalized value between larger and smaller
- */
- final private static double calculateColorFactor( final double value, final double larger, final double smaller ) {
- return ( 255.0 * ( value - smaller ) ) / ( larger - smaller );
- }
-
final public static String collapseWhiteSpace( final String s ) {
return s.replaceAll( "[\\s]+", " " );
}
return new BufferedWriter( new FileWriter( file ) );
}
+ final public static BufferedWriter createBufferedWriter( final String name ) throws IOException {
+ return new BufferedWriter( new FileWriter( createFileForWriting( name ) ) );
+ }
+
final public static EasyWriter createEasyWriter( final File file ) throws IOException {
return new EasyWriter( createBufferedWriter( file ) );
}
return createEasyWriter( createFileForWriting( name ) );
}
- final public static BufferedWriter createBufferedWriter( final String name ) throws IOException {
- return new BufferedWriter( new FileWriter( createFileForWriting( name ) ) );
- }
-
final public static File createFileForWriting( final String name ) throws IOException {
final File file = new File( name );
if ( file.exists() ) {
return file;
}
+ final public static void ensurePresenceOfDate( final PhylogenyNode node ) {
+ if ( !node.getNodeData().isHasDate() ) {
+ node.getNodeData().setDate( new org.forester.phylogeny.data.Date() );
+ }
+ }
+
+ final public static void ensurePresenceOfDistribution( final PhylogenyNode node ) {
+ if ( !node.getNodeData().isHasDistribution() ) {
+ node.getNodeData().setDistribution( new Distribution( "" ) );
+ }
+ }
+
+ public static void ensurePresenceOfSequence( final PhylogenyNode node ) {
+ if ( !node.getNodeData().isHasSequence() ) {
+ node.getNodeData().setSequence( new Sequence() );
+ }
+ }
+
+ public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) {
+ if ( !node.getNodeData().isHasTaxonomy() ) {
+ node.getNodeData().setTaxonomy( new Taxonomy() );
+ }
+ }
+
+ public static void fatalError( final String message ) {
+ System.err.println();
+ System.err.println( "error: " + message );
+ System.err.println();
+ System.exit( -1 );
+ }
+
public static void fatalError( final String prg_name, final String message ) {
System.err.println();
System.err.println( "[" + prg_name + "] > " + message );
System.exit( -1 );
}
+ public static void fatalErrorIfFileNotReadable( final File file ) {
+ final String error = isReadableFile( file );
+ if ( !isEmpty( error ) ) {
+ System.err.println();
+ System.err.println( "error: " + error );
+ System.err.println();
+ System.exit( -1 );
+ }
+ }
+
public static void fatalErrorIfFileNotReadable( final String prg_name, final File file ) {
final String error = isReadableFile( file );
if ( !isEmpty( error ) ) {
return ary;
}
+ public static String[][] file22dArray( final File file ) throws IOException {
+ final List<String> list = new ArrayList<String>();
+ final BufferedReader in = new BufferedReader( new FileReader( file ) );
+ String str;
+ while ( ( str = in.readLine() ) != null ) {
+ str = str.trim();
+ if ( ( str.length() > 0 ) && !str.startsWith( "#" ) ) {
+ list.add( str );
+ }
+ }
+ in.close();
+ final String[][] ary = new String[ list.size() ][ 2 ];
+ final Pattern pa = Pattern.compile( "(\\S+)\\s+(\\S+)" );
+ int i = 0;
+ for( final String s : list ) {
+ final Matcher m = pa.matcher( s );
+ if ( m.matches() ) {
+ ary[ i ][ 0 ] = m.group( 1 );
+ ary[ i ][ 1 ] = m.group( 2 );
+ ++i;
+ }
+ else {
+ throw new IOException( "unexpcted format: " + s );
+ }
+ }
+ return ary;
+ }
+
final public static List<String> file2list( final File file ) throws IOException {
final List<String> list = new ArrayList<String>();
final BufferedReader in = new BufferedReader( new FileReader( file ) );
return line;
}
+ final public static String getForesterLibraryInformation() {
+ return "forester " + ForesterConstants.FORESTER_VERSION + " (" + ForesterConstants.FORESTER_DATE + ")";
+ }
+
final public static String getLineSeparator() {
return ForesterUtil.LINE_SEPARATOR;
}
return isReadableFile( new File( s ) );
}
+ public final static boolean isWindows() {
+ try {
+ return OS_NAME.toLowerCase().indexOf( "win" ) > -1;
+ }
+ catch ( final Exception e ) {
+ ForesterUtil.printWarningMessage( Constants.PRG_NAME, "minor error: " + e );
+ return false;
+ }
+ }
+
+ public final static boolean isMac() {
+ try {
+ return OS_NAME.toLowerCase().startsWith( "mac" );
+ }
+ catch ( final Exception e ) {
+ ForesterUtil.printWarningMessage( Constants.PRG_NAME, "minor error: " + e );
+ return false;
+ }
+ }
+
final public static String isWritableFile( final File f ) {
if ( f.isDirectory() ) {
return "[" + f + "] is a directory";
}
final public static void printProgramInformation( final String prg_name,
+ final String prg_version,
+ final String date,
+ final String email,
+ final String www ) {
+ printProgramInformation( prg_name, null, prg_version, date, email, www, null );
+ }
+
+ final public static void printProgramInformation( final String prg_name,
final String desc,
final String prg_version,
final String date,
System.out.println();
}
- final public static void printProgramInformation( final String prg_name,
- final String prg_version,
- final String date,
- final String email,
- final String www ) {
- printProgramInformation( prg_name, null, prg_version, date, email, www, null );
- }
-
final public static void printWarningMessage( final String prg_name, final String message ) {
System.out.println( "[" + prg_name + "] > warning: " + message );
}
}
}
- final private static String[] splitString( final String str ) {
- final String regex = "[\\s;,]+";
- return str.split( regex );
+ public static boolean seqIsLikelyToBeAa( final String s ) {
+ final String seq = s.toLowerCase();
+ if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 )
+ || ( seq.indexOf( 'q' ) > -1 ) || ( seq.indexOf( 'h' ) > -1 ) || ( seq.indexOf( 'k' ) > -1 )
+ || ( seq.indexOf( 'w' ) > -1 ) || ( seq.indexOf( 's' ) > -1 ) || ( seq.indexOf( 'm' ) > -1 )
+ || ( seq.indexOf( 'p' ) > -1 ) || ( seq.indexOf( 'v' ) > -1 ) ) {
+ return true;
+ }
+ return false;
}
final public static String stringArrayToString( final String[] a ) {
return str_array;
}
+ final public static void unexpectedFatalError( final Exception e ) {
+ System.err.println();
+ System.err.println( "unexpected exception: should not have occured! Please contact program author(s)." );
+ e.printStackTrace( System.err );
+ System.err.println();
+ System.exit( -1 );
+ }
+
+ final public static void unexpectedFatalError( final Error e ) {
+ System.err.println();
+ System.err.println( "unexpected error: should not have occured! Please contact program author(s)." );
+ e.printStackTrace( System.err );
+ System.err.println();
+ System.exit( -1 );
+ }
+
+ final public static void unexpectedFatalError( final String message ) {
+ System.err.println();
+ System.err.println( "unexpected error: should not have occured! Please contact program author(s)." );
+ System.err.println( message );
+ System.err.println();
+ System.exit( -1 );
+ }
+
final public static void unexpectedFatalError( final String prg_name, final Exception e ) {
System.err.println();
System.err.println( "[" + prg_name
- + "] > Unexpected error. Should not have occured! Please contact program author(s)." );
+ + "] > unexpected error; should not have occured! Please contact program author(s)." );
e.printStackTrace( System.err );
System.err.println();
System.exit( -1 );
final public static void unexpectedFatalError( final String prg_name, final String message ) {
System.err.println();
System.err.println( "[" + prg_name
- + "] > Unexpected error. Should not have occured! Please contact program author(s)." );
+ + "] > unexpected error: should not have occured! Please contact program author(s)." );
System.err.println( message );
System.err.println();
System.exit( -1 );
final public static void unexpectedFatalError( final String prg_name, final String message, final Exception e ) {
System.err.println();
System.err.println( "[" + prg_name
- + "] > Unexpected error. Should not have occured! Please contact program author(s)." );
+ + "] > unexpected error: should not have occured! Please contact program author(s)." );
System.err.println( message );
e.printStackTrace( System.err );
System.err.println();
System.exit( -1 );
}
+ public final static void updateProgress( final double progress_percentage ) {
+ final int width = 50;
+ System.out.print( "\r[" );
+ int i = 0;
+ for( ; i <= ForesterUtil.roundToInt( progress_percentage * width ); i++ ) {
+ System.out.print( "." );
+ }
+ for( ; i < width; i++ ) {
+ System.out.print( " " );
+ }
+ System.out.print( "]" );
+ }
+
+ public final static void updateProgress( final int i, final DecimalFormat f ) {
+ System.out.print( "\r[" + f.format( i ) + "]" );
+ }
+
public final static String wordWrap( final String str, final int width ) {
final StringBuilder sb = new StringBuilder( str );
int start = 0;
}
return sb.toString();
}
+
+ /**
+ * Helper method for calcColor methods.
+ *
+ * @param smallercolor_component_x
+ * color component the smaller color
+ * @param largercolor_component_x
+ * color component the larger color
+ * @param x
+ * factor
+ * @return an int representing a color component
+ */
+ final private static int calculateColorComponent( final double smallercolor_component_x,
+ final double largercolor_component_x,
+ final double x ) {
+ return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) );
+ }
+
+ /**
+ * Helper method for calcColor methods.
+ *
+ *
+ * @param value
+ * the value
+ * @param larger
+ * the largest value
+ * @param smaller
+ * the smallest value
+ * @return a normalized value between larger and smaller
+ */
+ final private static double calculateColorFactor( final double value, final double larger, final double smaller ) {
+ return ( 255.0 * ( value - smaller ) ) / ( larger - smaller );
+ }
+
+ final private static String[] splitString( final String str ) {
+ final String regex = "[\\s;,]+";
+ return str.split( regex );
+ }
+
+ public final static void outOfMemoryError( final OutOfMemoryError e ) {
+ System.err.println();
+ System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" );
+ System.err.println();
+ e.printStackTrace( System.err );
+ System.err.println();
+ System.exit( -1 );
+ }
+
+ public final static Color obtainColorDependingOnTaxonomyGroup( final String tax ) {
+ if ( tax.equalsIgnoreCase( "deuterostomia" ) ) {
+ return DEUTEROSTOMIA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "protostomia" ) ) {
+ return PROTOSTOMIA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "metazoa" ) ) {
+ return METAZOA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "holozoa" ) ) {
+ return HOLOZOA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "fungi" ) ) {
+ return FUNGI_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "holomycota" ) ) {
+ return HOLOMYCOTA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "amoebozoa" ) ) {
+ return AMOEBOZOA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "viridiplantae" ) ) {
+ return VIRIDPLANTAE_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "rhodophyta" ) ) {
+ return RHODOPHYTA_COLOR;
+ }
+ else if ( tax.toLowerCase().startsWith( "hacrobia" ) ) {
+ return HACROBIA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "glaucocystophyceae" ) || tax.equalsIgnoreCase( "glaucophyta" ) ) {
+ return GLAUCOPHYTA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "stramenopiles" ) ) {
+ return STRAMENOPILES_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "alveolata" ) ) {
+ return ALVEOLATA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "rhizaria" ) ) {
+ return RHIZARIA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "excavata" ) ) {
+ return EXCAVATA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "apusozoa" ) ) {
+ return APUSOZOA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "archaea" ) ) {
+ return ARCHAEA_COLOR;
+ }
+ else if ( tax.equalsIgnoreCase( "bacteria" ) ) {
+ return BACTERIA_COLOR;
+ }
+ return null;
+ }
}