X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Futil%2FForesterUtil.java;h=a8c81b0dd23ae214f2280b324a659ae3f3f544f2;hb=aec065f948a075773794133f102ea19eb1d59f64;hp=ef302002c9f3bd1d637b361d739750841dfb4998;hpb=08bd889bc13de7415319ea7202d8afb27879c18d;p=jalview.git diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index ef30200..a8c81b0 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; @@ -59,6 +59,7 @@ import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.phylogeny.PhylogenyNode; @@ -82,6 +83,14 @@ public final class ForesterUtil { public static final NumberFormat FORMATTER_6; public static final NumberFormat FORMATTER_06; public static final NumberFormat FORMATTER_3; + public static final String NCBI_PROTEIN = "http://www.ncbi.nlm.nih.gov/protein/"; + public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/"; + public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/"; + public final static Pattern UNIPROT_KB_PATTERN_1 = Pattern + .compile( "(?:\\b|_)(?:sp|tr)[\\.|\\-_=/\\\\]([A-Z][0-9][A-Z0-9]{3}[0-9])(?:\\b|_)" ); + public final static Pattern UNIPROT_KB_PATTERN_2 = Pattern + .compile( "\\b(?:[A-Z0-9]{2,5}|(?:[A-Z][0-9][A-Z0-9]{3}[0-9]))_(([A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA)\\b" ); + public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:"; static { final DecimalFormatSymbols dfs = new DecimalFormatSymbols(); dfs.setDecimalSeparator( '.' ); @@ -95,28 +104,122 @@ public final class ForesterUtil { private ForesterUtil() { } - public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasTaxonomy() ) { - node.getNodeData().setTaxonomy( new Taxonomy() ); + public static String extractRefSeqAccessorAccessor( final PhylogenyNode node ) { + String v = null; + if ( node.getNodeData().isHasSequence() ) { + final Sequence seq = node.getNodeData().getSequence(); + if ( !isEmpty( seq.getSymbol() ) ) { + v = SequenceIdParser.parseRefSeqAccessor( seq.getSymbol() ); + } + if ( isEmpty( v ) && !isEmpty( seq.getName() ) ) { + v = SequenceIdParser.parseRefSeqAccessor( seq.getName() ); + } + if ( isEmpty( v ) && ( node.getNodeData().getSequence().getAccession() != null ) + && !isEmpty( seq.getAccession().getValue() ) ) { + v = SequenceIdParser.parseRefSeqAccessor( seq.getAccession().getValue() ); + } } + if ( isEmpty( v ) && !isEmpty( node.getName() ) ) { + v = SequenceIdParser.parseRefSeqAccessor( node.getName() ); + } + return v; } - public static void ensurePresenceOfSequence( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasSequence() ) { - node.getNodeData().setSequence( new Sequence() ); + public static String extractGenbankAccessor( final PhylogenyNode node ) { + String v = null; + if ( node.getNodeData().isHasSequence() ) { + final Sequence seq = node.getNodeData().getSequence(); + if ( !isEmpty( seq.getSymbol() ) ) { + v = SequenceIdParser.parseGenbankAccessor( seq.getSymbol() ); + } + if ( isEmpty( v ) && !isEmpty( seq.getName() ) ) { + v = SequenceIdParser.parseGenbankAccessor( seq.getName() ); + } + if ( isEmpty( v ) && ( node.getNodeData().getSequence().getAccession() != null ) + && !isEmpty( seq.getAccession().getValue() ) ) { + v = SequenceIdParser.parseGenbankAccessor( seq.getAccession().getValue() ); + } + } + if ( isEmpty( v ) && !isEmpty( node.getName() ) ) { + v = SequenceIdParser.parseGenbankAccessor( node.getName() ); } + return v; } - final public static void ensurePresenceOfDistribution( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasDistribution() ) { - node.getNodeData().setDistribution( new Distribution( "" ) ); + public static String extractGInumber( final PhylogenyNode node ) { + String v = null; + if ( node.getNodeData().isHasSequence() ) { + final Sequence seq = node.getNodeData().getSequence(); + if ( isEmpty( v ) && !isEmpty( seq.getName() ) ) { + v = SequenceIdParser.parseGInumber( seq.getName() ); + } + if ( isEmpty( v ) && ( node.getNodeData().getSequence().getAccession() != null ) + && !isEmpty( seq.getAccession().getValue() ) ) { + v = SequenceIdParser.parseGInumber( seq.getAccession().getValue() ); + } + } + if ( isEmpty( v ) && !isEmpty( node.getName() ) ) { + v = SequenceIdParser.parseGInumber( node.getName() ); } + return v; } - final public static void ensurePresenceOfDate( final PhylogenyNode node ) { - if ( !node.getNodeData().isHasDate() ) { - node.getNodeData().setDate( new org.forester.phylogeny.data.Date() ); + public static String extractUniProtKbProteinSeqIdentifier( final PhylogenyNode node ) { + String upkb = null; + if ( node.getNodeData().isHasSequence() ) { + final Sequence seq = node.getNodeData().getSequence(); + Matcher m; + if ( !isEmpty( seq.getSymbol() ) ) { + m = UNIPROT_KB_PATTERN_1.matcher( seq.getSymbol() ); + if ( m.find() ) { + upkb = m.group( 1 ); + } + else { + m = UNIPROT_KB_PATTERN_2.matcher( seq.getSymbol() ); + if ( m.find() ) { + upkb = m.group(); + } + } + } + if ( isEmpty( upkb ) && !isEmpty( seq.getName() ) ) { + m = UNIPROT_KB_PATTERN_1.matcher( seq.getName() ); + if ( m.find() ) { + upkb = m.group( 1 ); + } + else { + m = UNIPROT_KB_PATTERN_2.matcher( seq.getName() ); + if ( m.find() ) { + upkb = m.group(); + } + } + } + if ( isEmpty( upkb ) && ( node.getNodeData().getSequence().getAccession() != null ) + && !isEmpty( seq.getAccession().getValue() ) ) { + m = UNIPROT_KB_PATTERN_1.matcher( seq.getAccession().getValue() ); + if ( m.find() ) { + upkb = m.group( 1 ); + } + else { + m = UNIPROT_KB_PATTERN_2.matcher( seq.getAccession().getValue() ); + if ( m.find() ) { + upkb = m.group(); + } + } + } } + if ( isEmpty( upkb ) && !isEmpty( node.getName() ) ) { + final Matcher m1 = UNIPROT_KB_PATTERN_1.matcher( node.getName() ); + if ( m1.find() ) { + upkb = m1.group( 1 ); + } + else { + final Matcher m2 = UNIPROT_KB_PATTERN_2.matcher( node.getName() ); + if ( m2.find() ) { + upkb = m2.group(); + } + } + } + return upkb; } final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) { @@ -125,25 +228,6 @@ public final class ForesterUtil { } } - public static boolean isWindowns() { - return ForesterUtil.OS_NAME.toLowerCase().indexOf( "win" ) > -1; - } - - final public static String getForesterLibraryInformation() { - return "forester " + ForesterConstants.FORESTER_VERSION + " (" + ForesterConstants.FORESTER_DATE + ")"; - } - - public static boolean seqIsLikelyToBeAa( final String s ) { - final String seq = s.toLowerCase(); - if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 ) - || ( seq.indexOf( 'q' ) > -1 ) || ( seq.indexOf( 'h' ) > -1 ) || ( seq.indexOf( 'k' ) > -1 ) - || ( seq.indexOf( 'w' ) > -1 ) || ( seq.indexOf( 's' ) > -1 ) || ( seq.indexOf( 'm' ) > -1 ) - || ( seq.indexOf( 'p' ) > -1 ) || ( seq.indexOf( 'v' ) > -1 ) ) { - return true; - } - return false; - } - /** * This calculates a color. If value is equal to min the returned color is * minColor, if value is equal to max the returned color is maxColor, @@ -234,39 +318,6 @@ public final class ForesterUtil { } } - /** - * Helper method for calcColor methods. - * - * @param smallercolor_component_x - * color component the smaller color - * @param largercolor_component_x - * color component the larger color - * @param x - * factor - * @return an int representing a color component - */ - final private static int calculateColorComponent( final double smallercolor_component_x, - final double largercolor_component_x, - final double x ) { - return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) ); - } - - /** - * Helper method for calcColor methods. - * - * - * @param value - * the value - * @param larger - * the largest value - * @param smaller - * the smallest value - * @return a normalized value between larger and smaller - */ - final private static double calculateColorFactor( final double value, final double larger, final double smaller ) { - return ( 255.0 * ( value - smaller ) ) / ( larger - smaller ); - } - final public static String collapseWhiteSpace( final String s ) { return s.replaceAll( "[\\s]+", " " ); } @@ -337,6 +388,10 @@ public final class ForesterUtil { return new BufferedWriter( new FileWriter( file ) ); } + final public static BufferedWriter createBufferedWriter( final String name ) throws IOException { + return new BufferedWriter( new FileWriter( createFileForWriting( name ) ) ); + } + final public static EasyWriter createEasyWriter( final File file ) throws IOException { return new EasyWriter( createBufferedWriter( file ) ); } @@ -345,10 +400,6 @@ public final class ForesterUtil { return createEasyWriter( createFileForWriting( name ) ); } - final public static BufferedWriter createBufferedWriter( final String name ) throws IOException { - return new BufferedWriter( new FileWriter( createFileForWriting( name ) ) ); - } - final public static File createFileForWriting( final String name ) throws IOException { final File file = new File( name ); if ( file.exists() ) { @@ -357,6 +408,37 @@ public final class ForesterUtil { return file; } + final public static void ensurePresenceOfDate( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasDate() ) { + node.getNodeData().setDate( new org.forester.phylogeny.data.Date() ); + } + } + + final public static void ensurePresenceOfDistribution( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasDistribution() ) { + node.getNodeData().setDistribution( new Distribution( "" ) ); + } + } + + public static void ensurePresenceOfSequence( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasSequence() ) { + node.getNodeData().setSequence( new Sequence() ); + } + } + + public static void ensurePresenceOfTaxonomy( final PhylogenyNode node ) { + if ( !node.getNodeData().isHasTaxonomy() ) { + node.getNodeData().setTaxonomy( new Taxonomy() ); + } + } + + public static void fatalError( final String message ) { + System.err.println(); + System.err.println( "error: " + message ); + System.err.println(); + System.exit( -1 ); + } + public static void fatalError( final String prg_name, final String message ) { System.err.println(); System.err.println( "[" + prg_name + "] > " + message ); @@ -364,6 +446,16 @@ public final class ForesterUtil { System.exit( -1 ); } + public static void fatalErrorIfFileNotReadable( final File file ) { + final String error = isReadableFile( file ); + if ( !isEmpty( error ) ) { + System.err.println(); + System.err.println( "error: " + error ); + System.err.println(); + System.exit( -1 ); + } + } + public static void fatalErrorIfFileNotReadable( final String prg_name, final File file ) { final String error = isReadableFile( file ); if ( !isEmpty( error ) ) { @@ -471,6 +563,10 @@ public final class ForesterUtil { return line; } + final public static String getForesterLibraryInformation() { + return "forester " + ForesterConstants.FORESTER_VERSION + " (" + ForesterConstants.FORESTER_DATE + ")"; + } + final public static String getLineSeparator() { return ForesterUtil.LINE_SEPARATOR; } @@ -587,6 +683,10 @@ public final class ForesterUtil { return isReadableFile( new File( s ) ); } + public static boolean isWindowns() { + return ForesterUtil.OS_NAME.toLowerCase().indexOf( "win" ) > -1; + } + final public static String isWritableFile( final File f ) { if ( f.isDirectory() ) { return "[" + f + "] is a directory"; @@ -785,6 +885,14 @@ public final class ForesterUtil { } final public static void printProgramInformation( final String prg_name, + final String prg_version, + final String date, + final String email, + final String www ) { + printProgramInformation( prg_name, null, prg_version, date, email, www, null ); + } + + final public static void printProgramInformation( final String prg_name, final String desc, final String prg_version, final String date, @@ -815,14 +923,6 @@ public final class ForesterUtil { System.out.println(); } - final public static void printProgramInformation( final String prg_name, - final String prg_version, - final String date, - final String email, - final String www ) { - printProgramInformation( prg_name, null, prg_version, date, email, www, null ); - } - final public static void printWarningMessage( final String prg_name, final String message ) { System.out.println( "[" + prg_name + "] > warning: " + message ); } @@ -900,9 +1000,15 @@ public final class ForesterUtil { } } - final private static String[] splitString( final String str ) { - final String regex = "[\\s;,]+"; - return str.split( regex ); + public static boolean seqIsLikelyToBeAa( final String s ) { + final String seq = s.toLowerCase(); + if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 ) + || ( seq.indexOf( 'q' ) > -1 ) || ( seq.indexOf( 'h' ) > -1 ) || ( seq.indexOf( 'k' ) > -1 ) + || ( seq.indexOf( 'w' ) > -1 ) || ( seq.indexOf( 's' ) > -1 ) || ( seq.indexOf( 'm' ) > -1 ) + || ( seq.indexOf( 'p' ) > -1 ) || ( seq.indexOf( 'v' ) > -1 ) ) { + return true; + } + return false; } final public static String stringArrayToString( final String[] a ) { @@ -952,10 +1058,34 @@ public final class ForesterUtil { return str_array; } + final public static void unexpectedFatalError( final Exception e ) { + System.err.println(); + System.err.println( "unexpected exception: should not have occured! Please contact program author(s)." ); + e.printStackTrace( System.err ); + System.err.println(); + System.exit( -1 ); + } + + final public static void unexpectedFatalError( final Error e ) { + System.err.println(); + System.err.println( "unexpected error: should not have occured! Please contact program author(s)." ); + e.printStackTrace( System.err ); + System.err.println(); + System.exit( -1 ); + } + + final public static void unexpectedFatalError( final String message ) { + System.err.println(); + System.err.println( "unexpected error: should not have occured! Please contact program author(s)." ); + System.err.println( message ); + System.err.println(); + System.exit( -1 ); + } + final public static void unexpectedFatalError( final String prg_name, final Exception e ) { System.err.println(); System.err.println( "[" + prg_name - + "] > unexpected error (Should not have occured! Please contact program author(s).)" ); + + "] > unexpected error; should not have occured! Please contact program author(s)." ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); @@ -964,7 +1094,7 @@ public final class ForesterUtil { final public static void unexpectedFatalError( final String prg_name, final String message ) { System.err.println(); System.err.println( "[" + prg_name - + "] > unexpected error. Should not have occured! Please contact program author(s)." ); + + "] > unexpected error: should not have occured! Please contact program author(s)." ); System.err.println( message ); System.err.println(); System.exit( -1 ); @@ -973,13 +1103,30 @@ public final class ForesterUtil { final public static void unexpectedFatalError( final String prg_name, final String message, final Exception e ) { System.err.println(); System.err.println( "[" + prg_name - + "] > unexpected error. Should not have occured! Please contact program author(s)." ); + + "] > unexpected error: should not have occured! Please contact program author(s)." ); System.err.println( message ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); } + public final static void updateProgress( final double progress_percentage ) { + final int width = 50; + System.out.print( "\r[" ); + int i = 0; + for( ; i <= ForesterUtil.roundToInt( progress_percentage * width ); i++ ) { + System.out.print( "." ); + } + for( ; i < width; i++ ) { + System.out.print( " " ); + } + System.out.print( "]" ); + } + + public final static void updateProgress( final int i, final DecimalFormat f ) { + System.out.print( "\r[" + f.format( i ) + "]" ); + } + public final static String wordWrap( final String str, final int width ) { final StringBuilder sb = new StringBuilder( str ); int start = 0; @@ -1008,4 +1155,51 @@ public final class ForesterUtil { } return sb.toString(); } + + /** + * Helper method for calcColor methods. + * + * @param smallercolor_component_x + * color component the smaller color + * @param largercolor_component_x + * color component the larger color + * @param x + * factor + * @return an int representing a color component + */ + final private static int calculateColorComponent( final double smallercolor_component_x, + final double largercolor_component_x, + final double x ) { + return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) ); + } + + /** + * Helper method for calcColor methods. + * + * + * @param value + * the value + * @param larger + * the largest value + * @param smaller + * the smallest value + * @return a normalized value between larger and smaller + */ + final private static double calculateColorFactor( final double value, final double larger, final double smaller ) { + return ( 255.0 * ( value - smaller ) ) / ( larger - smaller ); + } + + final private static String[] splitString( final String str ) { + final String regex = "[\\s;,]+"; + return str.split( regex ); + } + + public final static void outOfMemoryError( final OutOfMemoryError e ) { + System.err.println(); + System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" ); + System.err.println(); + e.printStackTrace( System.err ); + System.err.println(); + System.exit( -1 ); + } }