X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Futil%2FForesterUtil.java;h=dc899760248f8295c912babcf4a7b776c1f56f2b;hb=917fdd32a745f77a8b70d7779e44b8b36e11185f;hp=b68d89b24e88aa9cc13f295c220bf5fbf62c8064;hpb=c4f9dc6343e1fee8846c893b968065d9d9178655;p=jalview.git diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index b68d89b..dc89976 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.util; @@ -34,12 +34,17 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; +import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.StringReader; +import java.io.Writer; import java.math.BigDecimal; import java.net.URL; +import java.net.URLConnection; +import java.security.KeyManagementException; +import java.security.NoSuchAlgorithmException; import java.text.DateFormat; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; @@ -47,11 +52,11 @@ import java.text.NumberFormat; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Collection; import java.util.Date; -import java.util.Hashtable; -import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; @@ -60,41 +65,42 @@ import java.util.TreeSet; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.forester.archaeopteryx.AptxConstants; import org.forester.io.parsers.PhylogenyParser; -import org.forester.io.parsers.nexus.NexusPhylogeniesParser; -import org.forester.io.parsers.nhx.NHXParser; -import org.forester.io.parsers.phyloxml.PhyloXmlParser; -import org.forester.io.parsers.phyloxml.PhyloXmlUtil; -import org.forester.io.parsers.tol.TolParser; -import org.forester.io.parsers.util.PhylogenyParserException; import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.Distribution; -import org.forester.phylogeny.data.Identifier; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; -import org.forester.phylogeny.factories.PhylogenyFactory; -import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.protein.BasicProtein; +import org.forester.protein.Domain; +import org.forester.protein.Protein; +import org.forester.sequence.MolecularSequence; +import org.forester.sequence.MolecularSequence.TYPE; +import org.forester.surfacing.SurfacingUtil; public final class ForesterUtil { public final static String FILE_SEPARATOR = System.getProperty( "file.separator" ); - public final static String LINE_SEPARATOR = System.getProperty( "line.separator" ); + public static final NumberFormat FORMATTER_06; + public static final NumberFormat FORMATTER_3; + public static final NumberFormat FORMATTER_6; + public static final NumberFormat FORMATTER_9; public final static String JAVA_VENDOR = System.getProperty( "java.vendor" ); public final static String JAVA_VERSION = System.getProperty( "java.version" ); + public final static String LINE_SEPARATOR = System.getProperty( "line.separator" ); + public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:"; + public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/"; + public static final String NCBI_PROTEIN = "http://www.ncbi.nlm.nih.gov/protein/"; + public static final BigDecimal NULL_BD = new BigDecimal( 0 ); public final static String OS_ARCH = System.getProperty( "os.arch" ); public final static String OS_NAME = System.getProperty( "os.name" ); public final static String OS_VERSION = System.getProperty( "os.version" ); - public final static Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s]" ); - public final static double ZERO_DIFF = 1.0E-9; - public static final BigDecimal NULL_BD = new BigDecimal( 0 ); - public static final NumberFormat FORMATTER_9; - public static final NumberFormat FORMATTER_6; - public static final NumberFormat FORMATTER_06; - public static final NumberFormat FORMATTER_3; + public static final String PDB = "http://www.pdb.org/pdb/explore/explore.do?pdbId="; + public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/"; + public final static double ZERO_DIFF = 1.0E-12; + private static final Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]]" ); static { final DecimalFormatSymbols dfs = new DecimalFormatSymbols(); dfs.setDecimalSeparator( '.' ); @@ -105,26 +111,30 @@ public final class ForesterUtil { FORMATTER_3 = new DecimalFormat( "#.###", dfs ); } - private ForesterUtil() { - } - final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) { if ( sb.length() > 0 ) { sb.append( separator ); } } + final public static String removeFileExtension( final String file_name ) { + if ( file_name.indexOf( "." ) > 0 ) { + return file_name.substring( 0, file_name.lastIndexOf( "." ) ); + } + return file_name; + } + /** * This calculates a color. If value is equal to min the returned color is * minColor, if value is equal to max the returned color is maxColor, * otherwise a color 'proportional' to value is returned. - * + * * @param value - * the value + * the value * @param min - * the smallest value + * the smallest value * @param max - * the largest value + * the largest value * @param minColor * the color for min * @param maxColor @@ -155,15 +165,15 @@ public final class ForesterUtil { * value is equal to mean the returned color is meanColor, otherwise a color * 'proportional' to value is returned -- either between min-mean or * mean-max - * + * * @param value * the value * @param min * the smallest value * @param max - * the largest value + * the largest value * @param mean - * the mean/median value + * the mean/median value * @param minColor * the color for min * @param maxColor @@ -206,7 +216,7 @@ public final class ForesterUtil { /** * Helper method for calcColor methods. - * + * * @param smallercolor_component_x * color component the smaller color * @param largercolor_component_x @@ -218,13 +228,14 @@ public final class ForesterUtil { final private static int calculateColorComponent( final double smallercolor_component_x, final double largercolor_component_x, final double x ) { - return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) ); + return ( int ) ( smallercolor_component_x + + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) ); } /** * Helper method for calcColor methods. - * - * + * + * * @param value * the value * @param larger @@ -237,10 +248,41 @@ public final class ForesterUtil { return ( 255.0 * ( value - smaller ) ) / ( larger - smaller ); } + public static int calculateOverlap( final Domain domain, final List covered_positions ) { + int overlap_count = 0; + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) { + ++overlap_count; + } + } + return overlap_count; + } + final public static String collapseWhiteSpace( final String s ) { return s.replaceAll( "[\\s]+", " " ); } + final public static void collection2file( final File file, final Collection data, final String separator ) + throws IOException { + final Writer writer = new BufferedWriter( new FileWriter( file ) ); + collection2writer( writer, data, separator ); + writer.close(); + } + + final public static void collection2writer( final Writer writer, final Collection data, final String separator ) + throws IOException { + boolean first = true; + for( final Object object : data ) { + if ( !first ) { + writer.write( separator ); + } + else { + first = false; + } + writer.write( object.toString() ); + } + } + final public static String colorToHex( final Color color ) { final String rgb = Integer.toHexString( color.getRGB() ); return rgb.substring( 2, rgb.length() ); @@ -290,6 +332,14 @@ public final class ForesterUtil { return new BufferedWriter( new FileWriter( createFileForWriting( name ) ) ); } + final public static EasyWriter createEasyWriter( final File file ) throws IOException { + return new EasyWriter( createBufferedWriter( file ) ); + } + + final public static BufferedWriter createEasyWriter( final String name ) throws IOException { + return createEasyWriter( createFileForWriting( name ) ); + } + final public static File createFileForWriting( final String name ) throws IOException { final File file = new File( name ); if ( file.exists() ) { @@ -298,127 +348,6 @@ public final class ForesterUtil { return file; } - final public static PhylogenyParser createParserDependingFileContents( final File file, - final boolean phyloxml_validate_against_xsd ) - throws FileNotFoundException, IOException { - PhylogenyParser parser = null; - final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase(); - if ( first_line.startsWith( "<" ) ) { - parser = new PhyloXmlParser(); - if ( phyloxml_validate_against_xsd ) { - final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); - final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); - if ( xsd_url != null ) { - ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); - } - else { - if ( ForesterConstants.RELEASE ) { - throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" - + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); - } - } - } - } - else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) - || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { - parser = new NexusPhylogeniesParser(); - } - else { - parser = new NHXParser(); - } - return parser; - } - - final public static PhylogenyParser createParserDependingOnFileType( final File file, - final boolean phyloxml_validate_against_xsd ) - throws FileNotFoundException, IOException { - PhylogenyParser parser = null; - parser = createParserDependingOnSuffix( file.getName(), phyloxml_validate_against_xsd ); - if ( parser == null ) { - parser = createParserDependingFileContents( file, phyloxml_validate_against_xsd ); - } - return parser; - } - - /** - * Return null if it can not guess the parser to use based on name suffix. - * - * @param filename - * @return - */ - final public static PhylogenyParser createParserDependingOnSuffix( final String filename, - final boolean phyloxml_validate_against_xsd ) { - PhylogenyParser parser = null; - final String filename_lc = filename.toLowerCase(); - if ( filename_lc.endsWith( ".tol" ) || filename_lc.endsWith( ".tolxml" ) || filename_lc.endsWith( ".tol.zip" ) ) { - parser = new TolParser(); - } - else if ( filename_lc.endsWith( ".xml" ) || filename_lc.endsWith( ".px" ) || filename_lc.endsWith( "phyloxml" ) - || filename_lc.endsWith( ".zip" ) ) { - parser = new PhyloXmlParser(); - if ( phyloxml_validate_against_xsd ) { - final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); - final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); - if ( xsd_url != null ) { - ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); - } - else { - if ( ForesterConstants.RELEASE ) { - throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" - + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); - } - } - } - } - else if ( filename_lc.endsWith( ".nexus" ) || filename_lc.endsWith( ".nex" ) || filename_lc.endsWith( ".nx" ) ) { - parser = new NexusPhylogeniesParser(); - } - else if ( filename_lc.endsWith( ".nhx" ) || filename_lc.endsWith( ".nh" ) || filename_lc.endsWith( ".newick" ) ) { - parser = new NHXParser(); - } - return parser; - } - - final public static PhylogenyParser createParserDependingOnUrlContents( final URL url, - final boolean phyloxml_validate_against_xsd ) - throws FileNotFoundException, IOException { - final String lc_filename = url.getFile().toString().toLowerCase(); - PhylogenyParser parser = createParserDependingOnSuffix( lc_filename, phyloxml_validate_against_xsd ); - if ( ( parser != null ) && lc_filename.endsWith( ".zip" ) ) { - if ( parser instanceof PhyloXmlParser ) { - ( ( PhyloXmlParser ) parser ).setZippedInputstream( true ); - } - else if ( parser instanceof TolParser ) { - ( ( TolParser ) parser ).setZippedInputstream( true ); - } - } - if ( parser == null ) { - final String first_line = getFirstLine( url ).trim().toLowerCase(); - if ( first_line.startsWith( "<" ) ) { - parser = new PhyloXmlParser(); - if ( phyloxml_validate_against_xsd ) { - final ClassLoader cl = PhyloXmlParser.class.getClassLoader(); - final URL xsd_url = cl.getResource( ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE ); - if ( xsd_url != null ) { - ( ( PhyloXmlParser ) parser ).setValidateAgainstSchema( xsd_url.toString() ); - } - else { - throw new RuntimeException( "failed to get URL for phyloXML XSD from jar file from [" - + ForesterConstants.LOCAL_PHYLOXML_XSD_RESOURCE + "]" ); - } - } - } - else if ( ( first_line.startsWith( "nexus" ) ) || ( first_line.startsWith( "#nexus" ) ) - || ( first_line.startsWith( "# nexus" ) ) || ( first_line.startsWith( "begin" ) ) ) { - parser = new NexusPhylogeniesParser(); - } - else { - parser = new NHXParser(); - } - } - return parser; - } - final public static void ensurePresenceOfDate( final PhylogenyNode node ) { if ( !node.getNodeData().isHasDate() ) { node.getNodeData().setDate( new org.forester.phylogeny.data.Date() ); @@ -443,56 +372,11 @@ public final class ForesterUtil { } } - /** - * Extracts a code if and only if: - * one and only one _, - * shorter than 25, - * no |, - * no ., - * if / present it has to be after the _, - * if PFAM_STYLE_ONLY: / must be present, - * tax code can only contain uppercase letters and numbers, - * and must contain at least one uppercase letter. - * Return null if no code extractable. - * - * @param name - * @param limit_to_five - * @return - */ - public static String extractTaxonomyCodeFromNodeName( final String name, - final boolean limit_to_five, - final ForesterUtil.TAXONOMY_EXTRACTION taxonomy_extraction ) { - if ( ( name.indexOf( "_" ) > 0 ) - && ( name.length() < 25 ) - && ( name.lastIndexOf( "_" ) == name.indexOf( "_" ) ) - && ( name.indexOf( "|" ) < 0 ) - && ( name.indexOf( "." ) < 0 ) - && ( ( taxonomy_extraction != ForesterUtil.TAXONOMY_EXTRACTION.PFAM_STYLE_ONLY ) || ( name - .indexOf( "/" ) >= 0 ) ) - && ( ( ( name.indexOf( "/" ) ) < 0 ) || ( name.indexOf( "/" ) > name.indexOf( "_" ) ) ) ) { - final String[] s = name.split( "[_/]" ); - if ( s.length > 1 ) { - String str = s[ 1 ]; - if ( limit_to_five ) { - if ( str.length() > 5 ) { - str = str.substring( 0, 5 ); - } - else if ( ( str.length() < 5 ) && ( str.startsWith( "RAT" ) || str.startsWith( "PIG" ) ) ) { - str = str.substring( 0, 3 ); - } - } - final Matcher letters_and_numbers = NHXParser.UC_LETTERS_NUMBERS_PATTERN.matcher( str ); - if ( !letters_and_numbers.matches() ) { - return null; - } - final Matcher numbers_only = NHXParser.NUMBERS_ONLY_PATTERN.matcher( str ); - if ( numbers_only.matches() ) { - return null; - } - return str; - } - } - return null; + public static void fatalError( final String message ) { + System.err.println(); + System.err.println( "error: " + message ); + System.err.println(); + System.exit( -1 ); } public static void fatalError( final String prg_name, final String message ) { @@ -502,6 +386,54 @@ public final class ForesterUtil { System.exit( -1 ); } + public static void fatalErrorIfFileNotReadable( final File file ) { + final String error = isReadableFile( file ); + if ( !isEmpty( error ) ) { + System.err.println(); + System.err.println( "error: " + error ); + System.err.println(); + System.exit( -1 ); + } + } + + public static void fatalErrorIfFileNotReadable( final String prg_name, final File file ) { + final String error = isReadableFile( file ); + if ( !isEmpty( error ) ) { + System.err.println(); + System.err.println( "[" + prg_name + "] > " + error ); + System.err.println(); + System.exit( -1 ); + } + } + + public static String[][] file22dArray( final File file ) throws IOException { + final List list = new ArrayList(); + final BufferedReader in = new BufferedReader( new FileReader( file ) ); + String str; + while ( ( str = in.readLine() ) != null ) { + str = str.trim(); + if ( ( str.length() > 0 ) && !str.startsWith( "#" ) ) { + list.add( str ); + } + } + in.close(); + final String[][] ary = new String[ list.size() ][ 2 ]; + final Pattern pa = Pattern.compile( "(\\S+)\\s+(\\S+)" ); + int i = 0; + for( final String s : list ) { + final Matcher m = pa.matcher( s ); + if ( m.matches() ) { + ary[ i ][ 0 ] = m.group( 1 ); + ary[ i ][ 1 ] = m.group( 2 ); + ++i; + } + else { + throw new IOException( "unexpcted format: " + s ); + } + } + return ary; + } + public static String[] file2array( final File file ) throws IOException { final List list = file2list( file ); final String[] ary = new String[ list.size() ]; @@ -578,7 +510,9 @@ public final class ForesterUtil { reader = new BufferedReader( new StringReader( source.toString() ) ); } else if ( source instanceof URL ) { - reader = new BufferedReader( new InputStreamReader( ( ( URL ) source ).openStream() ) ); + final URLConnection url_connection = ( ( URL ) source ).openConnection(); + url_connection.setDefaultUseCaches( false ); + reader = new BufferedReader( new InputStreamReader( url_connection.getInputStream() ) ); } else { throw new IllegalArgumentException( "dont know how to read [" + source.getClass() + "]" ); @@ -599,29 +533,28 @@ public final class ForesterUtil { return line; } + final public static String getForesterLibraryInformation() { + return "forester " + ForesterConstants.FORESTER_VERSION + " (" + ForesterConstants.FORESTER_DATE + ")"; + } + final public static String getLineSeparator() { return ForesterUtil.LINE_SEPARATOR; } - /** - * Returns all custom data tag names of this Phylogeny as Hashtable. Tag - * names are keys, values are Boolean set to false. - */ - final public static Hashtable getPropertyRefs( final Phylogeny phylogeny ) { - final Hashtable ht = new Hashtable(); - if ( phylogeny.isEmpty() ) { - return ht; - } - for( final PhylogenyNodeIterator iter = phylogeny.iteratorPreorder(); iter.hasNext(); ) { - final PhylogenyNode current_node = iter.next(); - if ( current_node.getNodeData().isHasProperties() ) { - final String[] tags = current_node.getNodeData().getProperties().getPropertyRefs(); - for( int i = 0; i < tags.length; ++i ) { - ht.put( tags[ i ], new Boolean( false ) ); - } + final public static MolecularSequence.TYPE guessMolecularSequenceType( final String mol_seq ) { + if ( mol_seq.contains( "L" ) || mol_seq.contains( "I" ) || mol_seq.contains( "E" ) || mol_seq.contains( "H" ) + || mol_seq.contains( "D" ) || mol_seq.contains( "Q" ) ) { + return TYPE.AA; + } + else { + if ( mol_seq.contains( "T" ) ) { + return TYPE.DNA; + } + else if ( mol_seq.contains( "U" ) ) { + return TYPE.RNA; } } - return ht; + return null; } final public static void increaseCountingMap( final Map counting_map, final String item_name ) { @@ -633,32 +566,6 @@ public final class ForesterUtil { } } - final static public boolean isAllNonEmptyInternalLabelsArePositiveNumbers( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isRoot() && !n.isExternal() ) { - if ( !ForesterUtil.isEmpty( n.getName() ) ) { - double d = -1.0; - try { - d = Double.parseDouble( n.getName() ); - } - catch ( final Exception e ) { - d = -1.0; - } - if ( d < 0.0 ) { - return false; - } - } - } - } - return true; - } - - final public static boolean isContainsParanthesesableNhCharacter( final String nh ) { - return PARANTHESESABLE_NH_CHARS_PATTERN.matcher( nh ).find(); - } - final public static boolean isEmpty( final List l ) { if ( ( l == null ) || l.isEmpty() ) { return true; @@ -687,55 +594,47 @@ public final class ForesterUtil { return ( ( s == null ) || ( s.length() < 1 ) ); } - final public static boolean isEqual( final double a, final double b ) { - return ( ( Math.abs( a - b ) ) < ZERO_DIFF ); - } - - final public static boolean isEven( final int n ) { - return n % 2 == 0; - } - - final static public boolean isHasAtLeastNodeWithEvent( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - if ( it.next().getNodeData().isHasEvent() ) { - return true; - } + final public static boolean isEmptyTrimmed( final String s ) { + if ( s == null ) { + return true; } - return false; + return ( ( s.trim().length() < 1 ) ); } /** - * Returns true if at least one branch has a length larger than zero. - * - * - * @param phy + * Returns true is Domain domain falls in an uninterrupted stretch of + * covered positions. + * + * @param domain + * @param covered_positions + * @return */ - final static public boolean isHasAtLeastOneBranchLengthLargerThanZero( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - if ( it.next().getDistanceToParent() > 0.0 ) { - return true; + public static boolean isEngulfed( final Domain domain, final List covered_positions ) { + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) { + return false; } } - return false; + return true; } - final static public boolean isHasAtLeastOneBranchWithSupportValues( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - if ( it.next().getBranchData().isHasConfidences() ) { - return true; - } - } - return false; + final public static boolean isEqual( final double a, final double b ) { + return ( ( Math.abs( a - b ) ) < ZERO_DIFF ); + } + + final public static boolean isEqual( final double a, final double b, final double tolerance ) { + return ( ( Math.abs( a - b ) ) < tolerance ); + } + + final public static boolean isEven( final int n ) { + return ( n % 2 ) == 0; } /** * This determines whether String[] a and String[] b have at least one * String in common (intersect). Returns false if at least one String[] is * null or empty. - * + * * @param a * a String[] b a String[] * @return true if both a and b or not empty or null and contain at least @@ -748,10 +647,9 @@ public final class ForesterUtil { if ( ( a.length < 1 ) || ( b.length < 1 ) ) { return false; } - for( int i = 0; i < a.length; ++i ) { - final String ai = a[ i ]; - for( int j = 0; j < b.length; ++j ) { - if ( ( ai != null ) && ( b[ j ] != null ) && ai.equals( b[ j ] ) ) { + for( final String ai : a ) { + for( final String element : b ) { + if ( ( ai != null ) && ( element != null ) && ai.equals( element ) ) { return true; } } @@ -768,6 +666,16 @@ public final class ForesterUtil { } } + public final static boolean isMac() { + try { + return OS_NAME.toLowerCase().startsWith( "mac" ); + } + catch ( final Exception e ) { + ForesterUtil.printWarningMessage( AptxConstants.PRG_NAME, "minor error: " + e ); + return false; + } + } + final public static boolean isNull( final BigDecimal s ) { return ( ( s == null ) || ( s.compareTo( NULL_BD ) == 0 ) ); } @@ -795,6 +703,16 @@ public final class ForesterUtil { return isReadableFile( new File( s ) ); } + public final static boolean isWindows() { + try { + return OS_NAME.toLowerCase().indexOf( "win" ) > -1; + } + catch ( final Exception e ) { + ForesterUtil.printWarningMessage( AptxConstants.PRG_NAME, "minor error: " + e ); + return false; + } + } + final public static String isWritableFile( final File f ) { if ( f.isDirectory() ) { return "[" + f + "] is a directory"; @@ -804,6 +722,10 @@ public final class ForesterUtil { } return null; } + + final public static String isWritableFile( final String s ) { + return isWritableFile( new File( s ) ); + } /** * Helper for method "stringToColor". @@ -820,7 +742,7 @@ public final class ForesterUtil { return i; } - final public static SortedMap listToSortedCountsMap( final List list ) { + final public static SortedMap listToSortedCountsMap( final List list ) { final SortedMap map = new TreeMap(); for( final Object key : list ) { if ( !map.containsKey( key ) ) { @@ -833,10 +755,39 @@ public final class ForesterUtil { return map; } - final public static StringBuffer mapToStringBuffer( final Map map, final String key_value_separator ) { + final public static void map2file( final File file, + final Map data, + final String entry_separator, + final String data_separator ) + throws IOException { + final Writer writer = new BufferedWriter( new FileWriter( file ) ); + map2writer( writer, data, entry_separator, data_separator ); + writer.close(); + } + + final public static void map2writer( final Writer writer, + final Map data, + final String entry_separator, + final String data_separator ) + throws IOException { + boolean first = true; + for( final Entry entry : data.entrySet() ) { + if ( !first ) { + writer.write( data_separator ); + } + else { + first = false; + } + writer.write( entry.getKey().toString() ); + writer.write( entry_separator ); + writer.write( entry.getValue().toString() ); + } + } + + final public static StringBuffer mapToStringBuffer( final Map map, + final String key_value_separator ) { final StringBuffer sb = new StringBuffer(); - for( final Iterator iter = map.keySet().iterator(); iter.hasNext(); ) { - final Object key = iter.next(); + for( final Object key : map.keySet() ) { sb.append( key.toString() ); sb.append( key_value_separator ); sb.append( map.get( key ).toString() ); @@ -866,6 +817,194 @@ public final class ForesterUtil { } } + public final static Color obtainColorDependingOnTaxonomyGroup( final String tax_group ) { + if ( !ForesterUtil.isEmpty( tax_group ) ) { + if ( tax_group.equals( TaxonomyGroups.DEUTEROSTOMIA ) ) { + return TaxonomyColors.DEUTEROSTOMIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.PROTOSTOMIA ) ) { + return TaxonomyColors.PROTOSTOMIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.CNIDARIA ) ) { + return TaxonomyColors.CNIDARIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.PLACOZOA ) ) { + return TaxonomyColors.PLACOZOA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.CTENOPHORA ) ) { + return TaxonomyColors.CTENOPHORA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.PORIFERA ) ) { + return TaxonomyColors.PORIFERA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.CHOANOFLAGELLIDA ) ) { + return TaxonomyColors.CHOANOFLAGELLIDA; + } + else if ( tax_group.equals( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) ) { + return TaxonomyColors.ICHTHYOSPOREA_AND_FILASTEREA; + } + else if ( tax_group.equals( TaxonomyGroups.DIKARYA ) ) { + return TaxonomyColors.DIKARYA_COLOR; + } + else if ( tax_group.equalsIgnoreCase( TaxonomyGroups.FUNGI ) + || tax_group.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) { + return TaxonomyColors.OTHER_FUNGI_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP ) ) { + return TaxonomyColors.NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.AMOEBOZOA ) ) { + return TaxonomyColors.AMOEBOZOA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.EMBRYOPHYTA ) ) { + return TaxonomyColors.EMBRYOPHYTA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.CHLOROPHYTA ) ) { + return TaxonomyColors.CHLOROPHYTA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.RHODOPHYTA ) ) { + return TaxonomyColors.RHODOPHYTA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.HACROBIA ) ) { + return TaxonomyColors.HACROBIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) ) { + return TaxonomyColors.GLAUCOPHYTA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.STRAMENOPILES ) ) { + return TaxonomyColors.STRAMENOPILES_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.ALVEOLATA ) ) { + return TaxonomyColors.ALVEOLATA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.RHIZARIA ) ) { + return TaxonomyColors.RHIZARIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.EXCAVATA ) ) { + return TaxonomyColors.EXCAVATA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.APUSOZOA ) ) { + return TaxonomyColors.APUSOZOA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.ARCHAEA ) ) { + return TaxonomyColors.ARCHAEA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.BACTERIA ) ) { + return TaxonomyColors.BACTERIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.VIRUSES ) ) { + return TaxonomyColors.VIRUSES_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.ALPHAHERPESVIRINAE ) ) { + return TaxonomyColors.ALPHAHERPESVIRINAE_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.BETAHERPESVIRINAE ) ) { + return TaxonomyColors.BETAHERPESVIRINAE_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.GAMMAHERPESVIRINAE ) ) { + return TaxonomyColors.GAMMAHERPESVIRINAE_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.OTHER ) ) { + return TaxonomyColors.OTHER_COLOR; + } + } + return null; + } + + public final static String obtainNormalizedTaxonomyGroup( final String tax ) { + if ( tax.equalsIgnoreCase( TaxonomyGroups.DEUTEROSTOMIA ) ) { + return TaxonomyGroups.DEUTEROSTOMIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.PROTOSTOMIA ) ) { + return TaxonomyGroups.PROTOSTOMIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.CNIDARIA ) ) { + return TaxonomyGroups.CNIDARIA; + } + else if ( tax.toLowerCase().startsWith( "trichoplax" ) || tax.equalsIgnoreCase( TaxonomyGroups.PLACOZOA ) ) { + return TaxonomyGroups.PLACOZOA; + } + else if ( tax.toLowerCase().startsWith( "mnemiopsis" ) || tax.equalsIgnoreCase( TaxonomyGroups.CTENOPHORA ) ) { + return TaxonomyGroups.CTENOPHORA; + } + else if ( tax.toLowerCase().startsWith( "amphimedon" ) || tax.equalsIgnoreCase( TaxonomyGroups.PORIFERA ) ) { + return TaxonomyGroups.PORIFERA; + } + else if ( tax.equalsIgnoreCase( "codonosigidae" ) || tax.equalsIgnoreCase( TaxonomyGroups.CHOANOFLAGELLIDA ) ) { + return TaxonomyGroups.CHOANOFLAGELLIDA; + } + else if ( tax.toLowerCase().startsWith( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) + || tax.toLowerCase().startsWith( "ichthyophonida and filasterea" ) + || tax.toLowerCase().startsWith( "ichthyosporea & filasterea" ) + || tax.toLowerCase().startsWith( "ichthyosporea and filasterea" ) ) { + return TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.DIKARYA ) ) { + return TaxonomyGroups.DIKARYA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.FUNGI ) || tax.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) { + return TaxonomyGroups.OTHER_FUNGI; + } + else if ( tax.toLowerCase().startsWith( "nucleariidae and fonticula" ) ) { + return TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.AMOEBOZOA ) ) { + return TaxonomyGroups.AMOEBOZOA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.EMBRYOPHYTA ) ) { + return TaxonomyGroups.EMBRYOPHYTA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.CHLOROPHYTA ) ) { + return TaxonomyGroups.CHLOROPHYTA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHODOPHYTA ) ) { + return TaxonomyGroups.RHODOPHYTA; + } + else if ( tax.toLowerCase().startsWith( TaxonomyGroups.HACROBIA ) ) { + return TaxonomyGroups.HACROBIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) || tax.equalsIgnoreCase( "glaucophyta" ) ) { + return TaxonomyGroups.GLAUCOCYSTOPHYCEAE; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.STRAMENOPILES ) ) { + return TaxonomyGroups.STRAMENOPILES; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.ALVEOLATA ) ) { + return TaxonomyGroups.ALVEOLATA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHIZARIA ) ) { + return TaxonomyGroups.RHIZARIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.EXCAVATA ) ) { + return TaxonomyGroups.EXCAVATA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.APUSOZOA ) ) { + return TaxonomyGroups.APUSOZOA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.ARCHAEA ) ) { + return TaxonomyGroups.ARCHAEA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.BACTERIA ) ) { + return TaxonomyGroups.BACTERIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.BACTERIA ) ) { + return TaxonomyGroups.BACTERIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.VIRUSES ) ) { + return TaxonomyGroups.VIRUSES; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.ALPHAHERPESVIRINAE ) ) { + return TaxonomyGroups.ALPHAHERPESVIRINAE; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.BETAHERPESVIRINAE ) ) { + return TaxonomyGroups.BETAHERPESVIRINAE; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.GAMMAHERPESVIRINAE ) ) { + return TaxonomyGroups.GAMMAHERPESVIRINAE; + } + return null; + } + final public static BufferedReader obtainReader( final Object source ) throws IOException, FileNotFoundException { BufferedReader reader = null; if ( source instanceof File ) { @@ -897,11 +1036,26 @@ public final class ForesterUtil { return reader; } - final public static StringBuffer pad( final double number, final int size, final char pad, final boolean left_pad ) { + public final static void outOfMemoryError( final OutOfMemoryError e ) { + System.err.println(); + System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" ); + System.err.println(); + e.printStackTrace( System.err ); + System.err.println(); + System.exit( -1 ); + } + + final public static StringBuffer pad( final double number, + final int size, + final char pad, + final boolean left_pad ) { return pad( new StringBuffer( number + "" ), size, pad, left_pad ); } - final public static StringBuffer pad( final String string, final int size, final char pad, final boolean left_pad ) { + final public static StringBuffer pad( final String string, + final int size, + final char pad, + final boolean left_pad ) { return pad( new StringBuffer( string ), size, pad, left_pad ); } @@ -939,16 +1093,6 @@ public final class ForesterUtil { return Integer.parseInt( str ); } - final public static void postOrderRelabelInternalNodes( final Phylogeny phylogeny, final int starting_number ) { - int i = starting_number; - for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) { - final PhylogenyNode node = it.next(); - if ( !node.isExternal() ) { - node.setName( String.valueOf( i++ ) ); - } - } - } - final public static void printArray( final Object[] a ) { for( int i = 0; i < a.length; ++i ) { System.out.println( "[" + i + "]=" + a[ i ] ); @@ -962,10 +1106,12 @@ public final class ForesterUtil { } final public static void printErrorMessage( final String prg_name, final String message ) { - System.out.println( "[" + prg_name + "] > error: " + message ); + System.err.println( "[" + prg_name + "] > error: " + message ); } - final public static void printProgramInformation( final String prg_name, final String prg_version, final String date ) { + final public static void printProgramInformation( final String prg_name, + final String prg_version, + final String date ) { final int l = prg_name.length() + prg_version.length() + date.length() + 4; System.out.println(); System.out.println( prg_name + " " + prg_version + " (" + date + ")" ); @@ -980,19 +1126,37 @@ public final class ForesterUtil { final String date, final String email, final String www ) { - final int l = prg_name.length() + prg_version.length() + date.length() + 4; + printProgramInformation( prg_name, null, prg_version, date, email, www, null ); + } + + final public static void printProgramInformation( final String prg_name, + final String desc, + final String prg_version, + final String date, + final String email, + final String www, + final String based_on ) { + String my_prg_name = new String( prg_name ); + if ( !ForesterUtil.isEmpty( desc ) ) { + my_prg_name += ( " - " + desc ); + } + final int l = my_prg_name.length() + prg_version.length() + date.length() + 4; System.out.println(); - System.out.println( prg_name + " " + prg_version + " (" + date + ")" ); + System.out.println( my_prg_name + " " + prg_version + " (" + date + ")" ); for( int i = 0; i < l; ++i ) { System.out.print( "_" ); } System.out.println(); System.out.println(); - System.out.println( "WWW : " + www ); - System.out.println( "Contact: " + email ); + System.out.println( "WWW : " + www ); + System.out.println( "Contact : " + email ); + if ( !ForesterUtil.isEmpty( based_on ) ) { + System.out.println( "Based on: " + based_on ); + } if ( !ForesterUtil.isEmpty( ForesterUtil.JAVA_VERSION ) && !ForesterUtil.isEmpty( ForesterUtil.JAVA_VENDOR ) ) { System.out.println(); - System.out.println( "[running on Java " + ForesterUtil.JAVA_VERSION + " " + ForesterUtil.JAVA_VENDOR + "]" ); + System.out + .println( "[running on Java " + ForesterUtil.JAVA_VERSION + " " + ForesterUtil.JAVA_VENDOR + "]" ); } System.out.println(); } @@ -1005,13 +1169,69 @@ public final class ForesterUtil { System.out.println( "[" + prg_name + "] > " + message ); } - public final static Phylogeny[] readPhylogenies( final PhylogenyParser parser, final File file ) throws IOException { - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - final Phylogeny[] trees = factory.create( file, parser ); - if ( ( trees == null ) || ( trees.length == 0 ) ) { - throw new PhylogenyParserException( "Unable to parse phylogeny from file: " + file ); + public static List readUrl( final String url_str ) throws IOException { + final URL url = new URL( url_str ); + final URLConnection urlc = url.openConnection(); + //urlc.setRequestProperty( "User-Agent", "" ); + final BufferedReader in = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) ); + String line; + final List result = new ArrayList(); + while ( ( line = in.readLine() ) != null ) { + result.add( line ); } - return trees; + in.close(); + return result; + } + + /** + * + * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 => + * domain with 0.3 is ignored + * + * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored + * + * + * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_ + * ignored + * + * @param max_allowed_overlap + * maximal allowed overlap (inclusive) to be still considered not + * overlapping (zero or negative value to allow any overlap) + * @param remove_engulfed_domains + * to remove domains which are completely engulfed by coverage of + * domains with better support + * @param protein + * @return + */ + public static Protein removeOverlappingDomains( final int max_allowed_overlap, + final boolean remove_engulfed_domains, + final Protein protein ) { + final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), + protein.getSpecies().getSpeciesId(), + protein.getLength() ); + final List sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein ); + final List covered_positions = new ArrayList(); + for( final Domain domain : sorted ) { + if ( ( ( max_allowed_overlap < 0 ) + || ( ForesterUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) ) + && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) { + final int covered_positions_size = covered_positions.size(); + for( int i = covered_positions_size; i < domain.getFrom(); ++i ) { + covered_positions.add( false ); + } + final int new_covered_positions_size = covered_positions.size(); + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( i < new_covered_positions_size ) { + covered_positions.set( i, true ); + } + else { + covered_positions.add( true ); + } + } + pruned_protein.addProteinDomain( domain ); + } + } + return pruned_protein; } final public static String removeSuffix( final String file_name ) { @@ -1024,12 +1244,12 @@ public final class ForesterUtil { /** * Removes all white space from String s. - * + * * @return String s with white space removed */ final public static String removeWhiteSpace( String s ) { int i; - for( i = 0; i <= s.length() - 1; i++ ) { + for( i = 0; i <= ( s.length() - 1 ); i++ ) { if ( ( s.charAt( i ) == ' ' ) || ( s.charAt( i ) == '\t' ) || ( s.charAt( i ) == '\n' ) || ( s.charAt( i ) == '\r' ) ) { s = s.substring( 0, i ) + s.substring( i + 1 ); @@ -1039,18 +1259,11 @@ public final class ForesterUtil { return s; } - final public static String replaceIllegalNhCharacters( final String nh ) { - if ( nh == null ) { - return ""; - } - return nh.trim().replaceAll( "[\\[\\]:]+", "_" ); - } - final public static String replaceIllegalNhxCharacters( final String nhx ) { if ( nhx == null ) { return ""; } - return nhx.trim().replaceAll( "[\\[\\](),:;\\s]+", "_" ); + return nhx.trim().replaceAll( "[\\[\\]']+", "_" ); } final public static double round( final double value, final int decimal_place ) { @@ -1083,15 +1296,59 @@ public final class ForesterUtil { } } + public final static StringBuilder santitizeStringForNH( String data ) { + data = data.replaceAll( "\\s+", " " ).trim(); + final StringBuilder sb = new StringBuilder(); + if ( data.length() > 0 ) { + final boolean single_pars = data.indexOf( '\'' ) > -1; + final boolean double_pars = data.indexOf( '"' ) > -1; + if ( single_pars && double_pars ) { + data = data.replace( '\'', '`' ); + sb.append( '\'' ); + sb.append( data ); + sb.append( '\'' ); + } + else if ( single_pars ) { + sb.append( '"' ); + sb.append( data ); + sb.append( '"' ); + } + else if ( PARANTHESESABLE_NH_CHARS_PATTERN.matcher( data ).find() ) { + sb.append( '\'' ); + sb.append( data ); + sb.append( '\'' ); + } + else { + sb.append( data ); + } + } + return sb; + } + + public static boolean seqIsLikelyToBeAa( final String s ) { + final String seq = s.toLowerCase(); + if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 ) + || ( seq.indexOf( 'q' ) > -1 ) || ( seq.indexOf( 'h' ) > -1 ) || ( seq.indexOf( 'k' ) > -1 ) + || ( seq.indexOf( 'w' ) > -1 ) || ( seq.indexOf( 's' ) > -1 ) || ( seq.indexOf( 'm' ) > -1 ) + || ( seq.indexOf( 'p' ) > -1 ) || ( seq.indexOf( 'v' ) > -1 ) ) { + return true; + } + return false; + } + final private static String[] splitString( final String str ) { final String regex = "[\\s;,]+"; return str.split( regex ); } + final public static String stringArrayToString( final String[] a ) { + return stringArrayToString( a, ", " ); + } + final public static String stringArrayToString( final String[] a, final String separator ) { final StringBuilder sb = new StringBuilder(); if ( ( a != null ) && ( a.length > 0 ) ) { - for( int i = 0; i < a.length - 1; ++i ) { + for( int i = 0; i < ( a.length - 1 ); ++i ) { sb.append( a[ i ] + separator ); } sb.append( a[ a.length - 1 ] ); @@ -1099,10 +1356,22 @@ public final class ForesterUtil { return sb.toString(); } + final public static String[] stringListToArray( final List list ) { + if ( list != null ) { + final String[] str = new String[ list.size() ]; + int i = 0; + for( final String l : list ) { + str[ i++ ] = l; + } + return str; + } + return null; + } + final public static String stringListToString( final List l, final String separator ) { final StringBuilder sb = new StringBuilder(); if ( ( l != null ) && ( l.size() > 0 ) ) { - for( int i = 0; i < l.size() - 1; ++i ) { + for( int i = 0; i < ( l.size() - 1 ); ++i ) { sb.append( l.get( i ) + separator ); } sb.append( l.get( l.size() - 1 ) ); @@ -1110,10 +1379,6 @@ public final class ForesterUtil { return sb.toString(); } - final public static String stringArrayToString( final String[] a ) { - return stringArrayToString( a, ", " ); - } - final public static String[] stringSetToArray( final Set strings ) { final String[] str_array = new String[ strings.size() ]; int i = 0; @@ -1123,139 +1388,34 @@ public final class ForesterUtil { return str_array; } - final static public void transferInternalNamesToBootstrapSupport( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isExternal() && !ForesterUtil.isEmpty( n.getName() ) ) { - double value = -1; - try { - value = Double.parseDouble( n.getName() ); - } - catch ( final NumberFormatException e ) { - throw new IllegalArgumentException( "failed to parse number from [" + n.getName() + "]: " - + e.getLocalizedMessage() ); - } - if ( value >= 0.0 ) { - n.getBranchData().addConfidence( new Confidence( value, "bootstrap" ) ); - n.setName( "" ); - } - } - } + final public static void unexpectedFatalError( final Error e ) { + System.err.println(); + System.err.println( "unexpected error: should not have occured! Please contact program author(s)." ); + e.printStackTrace( System.err ); + System.err.println(); + System.exit( -1 ); } - final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - if ( !n.isRoot() && !n.isExternal() && !n.getBranchData().isHasConfidences() ) { - if ( !ForesterUtil.isEmpty( n.getName() ) ) { - double d = -1.0; - try { - d = Double.parseDouble( n.getName() ); - } - catch ( final Exception e ) { - d = -1.0; - } - if ( d >= 0.0 ) { - n.getBranchData().addConfidence( new Confidence( d, "" ) ); - n.setName( "" ); - } - } - } - } + final public static void unexpectedFatalError( final Exception e ) { + System.err.println(); + System.err.println( "unexpected exception: should not have occured! Please contact program author(s)." ); + e.printStackTrace( System.err ); + System.err.println(); + System.exit( -1 ); } - final static public void transferNodeNameToField( final Phylogeny phy, final PhylogenyNodeField field ) { - final PhylogenyNodeIterator it = phy.iteratorPostorder(); - while ( it.hasNext() ) { - final PhylogenyNode n = it.next(); - final String name = n.getName().trim(); - if ( !ForesterUtil.isEmpty( name ) ) { - switch ( field ) { - case TAXONOMY_CODE: - //temp hack - // if ( name.length() > 5 ) { - // n.setName( "" ); - // if ( !n.getNodeData().isHasTaxonomy() ) { - // n.getNodeData().setTaxonomy( new Taxonomy() ); - // } - // n.getNodeData().getTaxonomy().setScientificName( name ); - // break; - // } - // - n.setName( "" ); - PhylogenyMethods.setTaxonomyCode( n, name ); - break; - case TAXONOMY_SCIENTIFIC_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setScientificName( name ); - break; - case TAXONOMY_COMMON_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - n.getNodeData().getTaxonomy().setCommonName( name ); - break; - case SEQUENCE_SYMBOL: - n.setName( "" ); - if ( !n.getNodeData().isHasSequence() ) { - n.getNodeData().setSequence( new Sequence() ); - } - n.getNodeData().getSequence().setSymbol( name ); - break; - case SEQUENCE_NAME: - n.setName( "" ); - if ( !n.getNodeData().isHasSequence() ) { - n.getNodeData().setSequence( new Sequence() ); - } - n.getNodeData().getSequence().setName( name ); - break; - case TAXONOMY_ID_UNIPROT_1: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - String id = name; - final int i = name.indexOf( '_' ); - if ( i > 0 ) { - id = name.substring( 0, i ); - } - else { - n.setName( "" ); - } - n.getNodeData().getTaxonomy() - .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); - break; - } - case TAXONOMY_ID_UNIPROT_2: { - if ( !n.getNodeData().isHasTaxonomy() ) { - n.getNodeData().setTaxonomy( new Taxonomy() ); - } - String id = name; - final int i = name.indexOf( '_' ); - if ( i > 0 ) { - id = name.substring( i + 1, name.length() ); - } - else { - n.setName( "" ); - } - n.getNodeData().getTaxonomy() - .setIdentifier( new Identifier( id, PhyloXmlUtil.UNIPROT_TAX_PROVIDER ) ); - break; - } - } - } - } + final public static void unexpectedFatalError( final String message ) { + System.err.println(); + System.err.println( "unexpected error: should not have occured! Please contact program author(s)." ); + System.err.println( message ); + System.err.println(); + System.exit( -1 ); } final public static void unexpectedFatalError( final String prg_name, final Exception e ) { System.err.println(); System.err.println( "[" + prg_name - + "] > unexpected error (Should not have occured! Please contact program author(s).)" ); + + "] > unexpected error; should not have occured! Please contact program author(s)." ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); @@ -1264,7 +1424,7 @@ public final class ForesterUtil { final public static void unexpectedFatalError( final String prg_name, final String message ) { System.err.println(); System.err.println( "[" + prg_name - + "] > unexpected error. Should not have occured! Please contact program author(s)." ); + + "] > unexpected error: should not have occured! Please contact program author(s)." ); System.err.println( message ); System.err.println(); System.exit( -1 ); @@ -1273,13 +1433,30 @@ public final class ForesterUtil { final public static void unexpectedFatalError( final String prg_name, final String message, final Exception e ) { System.err.println(); System.err.println( "[" + prg_name - + "] > unexpected error. Should not have occured! Please contact program author(s)." ); + + "] > unexpected error: should not have occured! Please contact program author(s)." ); System.err.println( message ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); } + public final static void updateProgress( final double progress_percentage ) { + final int width = 50; + System.out.print( "\r[" ); + int i = 0; + for( ; i <= ForesterUtil.roundToInt( progress_percentage * width ); i++ ) { + System.out.print( "." ); + } + for( ; i < width; i++ ) { + System.out.print( " " ); + } + System.out.print( "]" ); + } + + public final static void updateProgress( final int i, final DecimalFormat f ) { + System.out.print( "\r[" + f.format( i ) + "]" ); + } + public final static String wordWrap( final String str, final int width ) { final StringBuilder sb = new StringBuilder( str ); int start = 0; @@ -1293,7 +1470,7 @@ public final class ForesterUtil { ls = -1; start = i + 1; } - if ( i > start + width - 1 ) { + if ( i > ( ( start + width ) - 1 ) ) { if ( ls != -1 ) { sb.setCharAt( ls, '\n' ); start = ls + 1; @@ -1309,18 +1486,208 @@ public final class ForesterUtil { return sb.toString(); } - public static enum PhylogenyNodeField { - CLADE_NAME, - TAXONOMY_CODE, - TAXONOMY_SCIENTIFIC_NAME, - TAXONOMY_COMMON_NAME, - SEQUENCE_SYMBOL, - SEQUENCE_NAME, - TAXONOMY_ID_UNIPROT_1, - TAXONOMY_ID_UNIPROT_2; + public final static Phylogeny[] readPhylogeniesFromUrl( final URL url, final PhylogenyParser parser ) + throws NoSuchAlgorithmException, IOException, KeyManagementException { + if ( url == null ) { + throw new IllegalArgumentException( "URL to read from must not be null" ); + } + else if ( parser == null ) { + throw new IllegalArgumentException( "parser to use to read from URL must not be null" ); + } + final URLConnection con; + if ( url.toString().startsWith( "https:" ) ) { + con = TrustManager.makeHttpsURLConnection( url ); + } + else if ( url.toString().startsWith( "http:" ) ) { + con = url.openConnection(); + } + else { + throw new IllegalArgumentException( "Cannot deal with URL: " + url ); + } + if ( con == null ) { + throw new IOException( "could not create connection from " + url ); + } + con.setDefaultUseCaches( false ); + final InputStream is = con.getInputStream(); + if ( is == null ) { + throw new IOException( "could not create input stream from " + url ); + } + final Phylogeny[] trees = ParserBasedPhylogenyFactory.getInstance().create( is, parser ); + try { + is.close(); + } + catch ( final Exception e ) { + // ignore + } + return trees; } - public static enum TAXONOMY_EXTRACTION { - NO, YES, PFAM_STYLE_ONLY; + public final static File getMatchingFile( final File dir, final String prefix, final String suffix ) + throws IOException { + if ( !dir.exists() ) { + throw new IOException( "[" + dir + "] does not exist" ); + } + if ( !dir.isDirectory() ) { + throw new IOException( "[" + dir + "] is not a directory" ); + } + if ( dir.listFiles().length == 0 ) { + throw new IOException( "[" + dir + "] is empty" ); + } + final File files[] = dir.listFiles( new FilenameFilter() { + + @Override + public boolean accept( final File dir, final String name ) { + return ( name.endsWith( suffix ) ); + } + } ); + if ( files.length == 0 ) { + throw new IOException( "no files ending with \"" + suffix + "\" found in [" + dir + "]" ); + } + String my_prefix = prefix; + boolean done = false; + boolean more_than_one = false; + File the_one = null; + do { + int matches = 0; + for( File file : files ) { + if ( file.getName().startsWith( my_prefix ) ) { + matches++; + if ( matches > 1 ) { + the_one = null; + break; + } + the_one = file; + } + } + if ( matches > 1 ) { + more_than_one = true; + done = true; + } + if ( matches == 1 ) { + done = true; + } + else { + if ( my_prefix.length() <= 1 ) { + throw new IOException( "no file matching \"" + removeFileExtension( prefix ) + + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" ); + } + my_prefix = my_prefix.substring( 0, my_prefix.length() - 1 ); + } + } while ( !done ); + if ( more_than_one ) { + throw new IOException( "multiple files matching \"" + removeFileExtension( prefix ) + + "\" and ending with \"" + suffix + "\" found in [" + dir + "]" ); + } + else if ( the_one != null ) { + } + else { + throw new IOException( "no file matching \"" + removeFileExtension( prefix ) + "\" and ending with \"" + + suffix + "\" found in [" + dir + "]" ); + } + return the_one; + } + + public final static String greatestCommonPrefix( final String a, final String b ) { + final int min_length = Math.min( a.length(), b.length() ); + for( int i = 0; i < min_length; ++i ) { + if ( a.charAt( i ) != b.charAt( i ) ) { + return a.substring( 0, i ); + } + } + return a.substring( 0, min_length ); + } + + public final static String greatestCommonPrefix( final String a, final String b, final String separator ) { + if ( ForesterUtil.isEmpty( separator ) ) { + throw new IllegalArgumentException( "separator must not be null or empty" ); + } + final String[] as = a.split( Pattern.quote( separator ) ); + final String[] bs = b.split( Pattern.quote( separator ) ); + final int min_length = Math.min( as.length, bs.length ); + for( int i = 0; i < min_length; ++i ) { + if ( !( as[ i ].equals( bs[ i ] ) ) ) { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for( int j = 0; j < i; ++j ) { + if ( first ) { + first = false; + } + else { + sb.append( separator ); + } + sb.append( as[ j ] ); + } + return sb.toString(); + } + } + StringBuilder sb = new StringBuilder(); + boolean first = true; + for( int j = 0; j < min_length; ++j ) { + if ( first ) { + first = false; + } + else { + sb.append( separator ); + } + sb.append( as[ j ] ); + } + return sb.toString(); + } + + public final static String greatestCommonPrefix( final List strings ) { + if ( strings == null ) { + throw new IllegalArgumentException( "list of strings is null" ); + } + if ( strings.isEmpty() ) { + throw new IllegalArgumentException( "list of strings is empty" ); + } + String common = strings.get( 0 ); + for( int i = 1; i < strings.size(); ++i ) { + common = greatestCommonPrefix( common, strings.get( i ) ); + } + return common; + } + + public final static String greatestCommonPrefix( final List strings, final String separator ) { + if ( ForesterUtil.isEmpty( separator ) ) { + return greatestCommonPrefix( strings ); + } + if ( strings == null ) { + throw new IllegalArgumentException( "list of strings is null" ); + } + if ( strings.isEmpty() ) { + throw new IllegalArgumentException( "list of strings is empty" ); + } + String common = strings.get( 0 ); + for( int i = 1; i < strings.size(); ++i ) { + common = greatestCommonPrefix( common, strings.get( i ), separator ); + } + return common; + } + + private ForesterUtil() { + } + + public static List spliIntoPrefixes( final String prefix, final String separator ) { + final String[] a = prefix.split( Pattern.quote( separator ) ); + final List l = new ArrayList(); + for( int i = 0; i < a.length; ++i ) { + final StringBuilder sb = new StringBuilder(); + for( int j = 0; j <= i; ++j ) { + sb.append( a[ j ] ); + if ( j < i ) { + sb.append( separator ); + } + } + // System.out.println( sb.toString() ); + l.add( sb.toString() ); + } + return l; + } + + // + public static boolean isLooksLikeFasta( final File file ) throws IOException { + final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase(); + return ( ( !isEmptyTrimmed( first_line ) && first_line.trim().startsWith( ">" ) ) ); } }