X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Futil%2FForesterUtil.java;h=b3eb0f218612e582556885a34eb7a6765930bd1c;hb=df8e9950662eaab9427f6873dcd0072f0d28f690;hp=8036c2d8342d5ea79b84b35ba12ee8947262a2c1;hpb=0fc3bc32fc5be907e3f91a780af68c6baff79db1;p=jalview.git diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index 8036c2d..b3eb0f2 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -41,6 +41,7 @@ import java.io.StringReader; import java.io.Writer; import java.math.BigDecimal; import java.net.URL; +import java.net.URLConnection; import java.text.DateFormat; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; @@ -50,7 +51,6 @@ import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collection; import java.util.Date; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -70,46 +70,31 @@ import org.forester.phylogeny.data.Taxonomy; import org.forester.protein.BasicProtein; import org.forester.protein.Domain; import org.forester.protein.Protein; +import org.forester.sequence.MolecularSequence; +import org.forester.sequence.MolecularSequence.TYPE; import org.forester.surfacing.SurfacingUtil; public final class ForesterUtil { public final static String FILE_SEPARATOR = System.getProperty( "file.separator" ); - public final static String LINE_SEPARATOR = System.getProperty( "line.separator" ); + public static final NumberFormat FORMATTER_06; + public static final NumberFormat FORMATTER_3; + public static final NumberFormat FORMATTER_6; + public static final NumberFormat FORMATTER_9; public final static String JAVA_VENDOR = System.getProperty( "java.vendor" ); public final static String JAVA_VERSION = System.getProperty( "java.version" ); + public final static String LINE_SEPARATOR = System.getProperty( "line.separator" ); + public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:"; + public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/"; + public static final String NCBI_PROTEIN = "http://www.ncbi.nlm.nih.gov/protein/"; + public static final BigDecimal NULL_BD = new BigDecimal( 0 ); public final static String OS_ARCH = System.getProperty( "os.arch" ); public final static String OS_NAME = System.getProperty( "os.name" ); public final static String OS_VERSION = System.getProperty( "os.version" ); - public final static Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s]" ); - public final static double ZERO_DIFF = 1.0E-9; - public static final BigDecimal NULL_BD = new BigDecimal( 0 ); - public static final NumberFormat FORMATTER_9; - public static final NumberFormat FORMATTER_6; - public static final NumberFormat FORMATTER_06; - public static final NumberFormat FORMATTER_3; - public static final String NCBI_PROTEIN = "http://www.ncbi.nlm.nih.gov/protein/"; - public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/"; + public static final String PDB = "http://www.pdb.org/pdb/explore/explore.do?pdbId="; public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/"; - public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:"; - public final static Color DEUTEROSTOMIA_COLOR = new Color( 255, 0, 0 ); - public final static Color PROTOSTOMIA_COLOR = new Color( 204, 0, 0 ); - public final static Color METAZOA_COLOR = new Color( 204, 0, 102 ); - public final static Color HOLOZOA_COLOR = new Color( 127, 0, 255 ); - public final static Color FUNGI_COLOR = new Color( 255, 153, 0 ); - public final static Color HOLOMYCOTA_COLOR = new Color( 204, 102, 0 ); - public final static Color AMOEBOZOA_COLOR = new Color( 255, 0, 255 ); - public final static Color VIRIDPLANTAE_COLOR = new Color( 0, 255, 0 ); - public final static Color RHODOPHYTA_COLOR = new Color( 0, 153, 76 ); - public final static Color HACROBIA_COLOR = new Color( 0, 102, 51 ); - public final static Color GLAUCOPHYTA_COLOR = new Color( 0, 102, 51 ); - public final static Color STRAMENOPILES_COLOR = new Color( 0, 0, 255 ); - public final static Color ALVEOLATA_COLOR = new Color( 0, 128, 255 ); - public final static Color RHIZARIA_COLOR = new Color( 0, 255, 255 ); - public static final Color APUSOZOA_COLOR = new Color( 204, 255, 255 ); - public final static Color EXCAVATA_COLOR = new Color( 204, 204, 0 ); - public final static Color ARCHAEA_COLOR = new Color( 160, 160, 160 ); - public final static Color BACTERIA_COLOR = new Color( 64, 64, 64 ); + public final static double ZERO_DIFF = 1.0E-9; + private static final Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]]" ); static { final DecimalFormatSymbols dfs = new DecimalFormatSymbols(); dfs.setDecimalSeparator( '.' ); @@ -120,19 +105,6 @@ public final class ForesterUtil { FORMATTER_3 = new DecimalFormat( "#.###", dfs ); } - private ForesterUtil() { - } - - public static int calculateOverlap( final Domain domain, final List covered_positions ) { - int overlap_count = 0; - for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { - if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) { - ++overlap_count; - } - } - return overlap_count; - } - final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) { if ( sb.length() > 0 ) { sb.append( separator ); @@ -140,82 +112,16 @@ public final class ForesterUtil { } /** - * - * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 => - * domain with 0.3 is ignored - * - * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored - * - * - * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_ - * ignored - * - * @param max_allowed_overlap - * maximal allowed overlap (inclusive) to be still considered not - * overlapping (zero or negative value to allow any overlap) - * @param remove_engulfed_domains - * to remove domains which are completely engulfed by coverage of - * domains with better support - * @param protein - * @return - */ - public static Protein removeOverlappingDomains( final int max_allowed_overlap, - final boolean remove_engulfed_domains, - final Protein protein ) { - final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies() - .getSpeciesId(), protein.getLength() ); - final List sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein ); - final List covered_positions = new ArrayList(); - for( final Domain domain : sorted ) { - if ( ( ( max_allowed_overlap < 0 ) || ( ForesterUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) ) - && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) { - final int covered_positions_size = covered_positions.size(); - for( int i = covered_positions_size; i < domain.getFrom(); ++i ) { - covered_positions.add( false ); - } - final int new_covered_positions_size = covered_positions.size(); - for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { - if ( i < new_covered_positions_size ) { - covered_positions.set( i, true ); - } - else { - covered_positions.add( true ); - } - } - pruned_protein.addProteinDomain( domain ); - } - } - return pruned_protein; - } - - /** - * Returns true is Domain domain falls in an uninterrupted stretch of - * covered positions. - * - * @param domain - * @param covered_positions - * @return - */ - public static boolean isEngulfed( final Domain domain, final List covered_positions ) { - for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { - if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) { - return false; - } - } - return true; - } - - /** * This calculates a color. If value is equal to min the returned color is * minColor, if value is equal to max the returned color is maxColor, * otherwise a color 'proportional' to value is returned. - * + * * @param value - * the value + * the value * @param min - * the smallest value + * the smallest value * @param max - * the largest value + * the largest value * @param minColor * the color for min * @param maxColor @@ -246,15 +152,15 @@ public final class ForesterUtil { * value is equal to mean the returned color is meanColor, otherwise a color * 'proportional' to value is returned -- either between min-mean or * mean-max - * + * * @param value * the value * @param min * the smallest value * @param max - * the largest value + * the largest value * @param mean - * the mean/median value + * the mean/median value * @param minColor * the color for min * @param maxColor @@ -295,6 +201,49 @@ public final class ForesterUtil { } } + /** + * Helper method for calcColor methods. + * + * @param smallercolor_component_x + * color component the smaller color + * @param largercolor_component_x + * color component the larger color + * @param x + * factor + * @return an int representing a color component + */ + final private static int calculateColorComponent( final double smallercolor_component_x, + final double largercolor_component_x, + final double x ) { + return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) ); + } + + /** + * Helper method for calcColor methods. + * + * + * @param value + * the value + * @param larger + * the largest value + * @param smaller + * the smallest value + * @return a normalized value between larger and smaller + */ + final private static double calculateColorFactor( final double value, final double larger, final double smaller ) { + return ( 255.0 * ( value - smaller ) ) / ( larger - smaller ); + } + + public static int calculateOverlap( final Domain domain, final List covered_positions ) { + int overlap_count = 0; + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) { + ++overlap_count; + } + } + return overlap_count; + } + final public static String collapseWhiteSpace( final String s ) { return s.replaceAll( "[\\s]+", " " ); } @@ -443,16 +392,6 @@ public final class ForesterUtil { } } - public static String[] file2array( final File file ) throws IOException { - final List list = file2list( file ); - final String[] ary = new String[ list.size() ]; - int i = 0; - for( final String s : list ) { - ary[ i++ ] = s; - } - return ary; - } - public static String[][] file22dArray( final File file ) throws IOException { final List list = new ArrayList(); final BufferedReader in = new BufferedReader( new FileReader( file ) ); @@ -481,6 +420,16 @@ public final class ForesterUtil { return ary; } + public static String[] file2array( final File file ) throws IOException { + final List list = file2list( file ); + final String[] ary = new String[ list.size() ]; + int i = 0; + for( final String s : list ) { + ary[ i++ ] = s; + } + return ary; + } + final public static List file2list( final File file ) throws IOException { final List list = new ArrayList(); final BufferedReader in = new BufferedReader( new FileReader( file ) ); @@ -547,7 +496,9 @@ public final class ForesterUtil { reader = new BufferedReader( new StringReader( source.toString() ) ); } else if ( source instanceof URL ) { - reader = new BufferedReader( new InputStreamReader( ( ( URL ) source ).openStream() ) ); + final URLConnection url_connection = ( ( URL ) source ).openConnection(); + url_connection.setDefaultUseCaches( false ); + reader = new BufferedReader( new InputStreamReader( url_connection.getInputStream() ) ); } else { throw new IllegalArgumentException( "dont know how to read [" + source.getClass() + "]" ); @@ -576,6 +527,22 @@ public final class ForesterUtil { return ForesterUtil.LINE_SEPARATOR; } + final public static MolecularSequence.TYPE guessMolecularSequenceType( final String mol_seq ) { + if ( mol_seq.contains( "L" ) || mol_seq.contains( "I" ) || mol_seq.contains( "E" ) || mol_seq.contains( "H" ) + || mol_seq.contains( "D" ) || mol_seq.contains( "Q" ) ) { + return TYPE.AA; + } + else { + if ( mol_seq.contains( "T" ) ) { + return TYPE.DNA; + } + else if ( mol_seq.contains( "U" ) ) { + return TYPE.RNA; + } + } + return null; + } + final public static void increaseCountingMap( final Map counting_map, final String item_name ) { if ( !counting_map.containsKey( item_name ) ) { counting_map.put( item_name, 1 ); @@ -585,10 +552,6 @@ public final class ForesterUtil { } } - final public static boolean isContainsParanthesesableNhCharacter( final String nh ) { - return PARANTHESESABLE_NH_CHARS_PATTERN.matcher( nh ).find(); - } - final public static boolean isEmpty( final List l ) { if ( ( l == null ) || l.isEmpty() ) { return true; @@ -617,6 +580,23 @@ public final class ForesterUtil { return ( ( s == null ) || ( s.length() < 1 ) ); } + /** + * Returns true is Domain domain falls in an uninterrupted stretch of + * covered positions. + * + * @param domain + * @param covered_positions + * @return + */ + public static boolean isEngulfed( final Domain domain, final List covered_positions ) { + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) { + return false; + } + } + return true; + } + final public static boolean isEqual( final double a, final double b ) { return ( ( Math.abs( a - b ) ) < ZERO_DIFF ); } @@ -629,7 +609,7 @@ public final class ForesterUtil { * This determines whether String[] a and String[] b have at least one * String in common (intersect). Returns false if at least one String[] is * null or empty. - * + * * @param a * a String[] b a String[] * @return true if both a and b or not empty or null and contain at least @@ -661,6 +641,16 @@ public final class ForesterUtil { } } + public final static boolean isMac() { + try { + return OS_NAME.toLowerCase().startsWith( "mac" ); + } + catch ( final Exception e ) { + ForesterUtil.printWarningMessage( Constants.PRG_NAME, "minor error: " + e ); + return false; + } + } + final public static boolean isNull( final BigDecimal s ) { return ( ( s == null ) || ( s.compareTo( NULL_BD ) == 0 ) ); } @@ -698,16 +688,6 @@ public final class ForesterUtil { } } - public final static boolean isMac() { - try { - return OS_NAME.toLowerCase().startsWith( "mac" ); - } - catch ( final Exception e ) { - ForesterUtil.printWarningMessage( Constants.PRG_NAME, "minor error: " + e ); - return false; - } - } - final public static String isWritableFile( final File f ) { if ( f.isDirectory() ) { return "[" + f + "] is a directory"; @@ -733,7 +713,7 @@ public final class ForesterUtil { return i; } - final public static SortedMap listToSortedCountsMap( final List list ) { + final public static SortedMap listToSortedCountsMap( final List list ) { final SortedMap map = new TreeMap(); for( final Object key : list ) { if ( !map.containsKey( key ) ) { @@ -773,10 +753,9 @@ public final class ForesterUtil { } } - final public static StringBuffer mapToStringBuffer( final Map map, final String key_value_separator ) { + final public static StringBuffer mapToStringBuffer( final Map map, final String key_value_separator ) { final StringBuffer sb = new StringBuffer(); - for( final Iterator iter = map.keySet().iterator(); iter.hasNext(); ) { - final Object key = iter.next(); + for( final Object key : map.keySet() ) { sb.append( key.toString() ); sb.append( key_value_separator ); sb.append( map.get( key ).toString() ); @@ -806,6 +785,164 @@ public final class ForesterUtil { } } + public final static Color obtainColorDependingOnTaxonomyGroup( final String tax_group ) { + if ( !ForesterUtil.isEmpty( tax_group ) ) { + if ( tax_group.equals( TaxonomyGroups.DEUTEROSTOMIA ) ) { + return TaxonomyColors.DEUTEROSTOMIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.PROTOSTOMIA ) ) { + return TaxonomyColors.PROTOSTOMIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.CNIDARIA ) ) { + return TaxonomyColors.CNIDARIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.PLACOZOA ) ) { + return TaxonomyColors.PLACOZOA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.CTENOPHORA ) ) { + return TaxonomyColors.CTENOPHORA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.PORIFERA ) ) { + return TaxonomyColors.PORIFERA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.CHOANOFLAGELLIDA ) ) { + return TaxonomyColors.CHOANOFLAGELLIDA; + } + else if ( tax_group.equals( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) ) { + return TaxonomyColors.ICHTHYOSPOREA_AND_FILASTEREA; + } + else if ( tax_group.equals( TaxonomyGroups.DIKARYA ) ) { + return TaxonomyColors.DIKARYA_COLOR; + } + else if ( tax_group.equalsIgnoreCase( TaxonomyGroups.FUNGI ) + || tax_group.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) { + return TaxonomyColors.OTHER_FUNGI_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP ) ) { + return TaxonomyColors.NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.AMOEBOZOA ) ) { + return TaxonomyColors.AMOEBOZOA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.EMBRYOPHYTA ) ) { + return TaxonomyColors.EMBRYOPHYTA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.CHLOROPHYTA ) ) { + return TaxonomyColors.CHLOROPHYTA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.RHODOPHYTA ) ) { + return TaxonomyColors.RHODOPHYTA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.HACROBIA ) ) { + return TaxonomyColors.HACROBIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) ) { + return TaxonomyColors.GLAUCOPHYTA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.STRAMENOPILES ) ) { + return TaxonomyColors.STRAMENOPILES_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.ALVEOLATA ) ) { + return TaxonomyColors.ALVEOLATA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.RHIZARIA ) ) { + return TaxonomyColors.RHIZARIA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.EXCAVATA ) ) { + return TaxonomyColors.EXCAVATA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.APUSOZOA ) ) { + return TaxonomyColors.APUSOZOA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.ARCHAEA ) ) { + return TaxonomyColors.ARCHAEA_COLOR; + } + else if ( tax_group.equals( TaxonomyGroups.BACTERIA ) ) { + return TaxonomyColors.BACTERIA_COLOR; + } + } + return null; + } + + public final static String obtainNormalizedTaxonomyGroup( final String tax ) { + if ( tax.equalsIgnoreCase( TaxonomyGroups.DEUTEROSTOMIA ) ) { + return TaxonomyGroups.DEUTEROSTOMIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.PROTOSTOMIA ) ) { + return TaxonomyGroups.PROTOSTOMIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.CNIDARIA ) ) { + return TaxonomyGroups.CNIDARIA; + } + else if ( tax.toLowerCase().startsWith( "trichoplax" ) || tax.equalsIgnoreCase( TaxonomyGroups.PLACOZOA ) ) { + return TaxonomyGroups.PLACOZOA; + } + else if ( tax.toLowerCase().startsWith( "mnemiopsis" ) || tax.equalsIgnoreCase( TaxonomyGroups.CTENOPHORA ) ) { + return TaxonomyGroups.CTENOPHORA; + } + else if ( tax.toLowerCase().startsWith( "amphimedon" ) || tax.equalsIgnoreCase( TaxonomyGroups.PORIFERA ) ) { + return TaxonomyGroups.PORIFERA; + } + else if ( tax.equalsIgnoreCase( "codonosigidae" ) || tax.equalsIgnoreCase( TaxonomyGroups.CHOANOFLAGELLIDA ) ) { + return TaxonomyGroups.CHOANOFLAGELLIDA; + } + else if ( tax.toLowerCase().startsWith( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) + || tax.toLowerCase().startsWith( "ichthyophonida and filasterea" ) + || tax.toLowerCase().startsWith( "ichthyosporea & filasterea" ) + || tax.toLowerCase().startsWith( "ichthyosporea and filasterea" ) ) { + return TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.DIKARYA ) ) { + return TaxonomyGroups.DIKARYA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.FUNGI ) || tax.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) { + return TaxonomyGroups.OTHER_FUNGI; + } + else if ( tax.toLowerCase().startsWith( "nucleariidae and fonticula" ) ) { + return TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.AMOEBOZOA ) ) { + return TaxonomyGroups.AMOEBOZOA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.EMBRYOPHYTA ) ) { + return TaxonomyGroups.EMBRYOPHYTA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.CHLOROPHYTA ) ) { + return TaxonomyGroups.CHLOROPHYTA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHODOPHYTA ) ) { + return TaxonomyGroups.RHODOPHYTA; + } + else if ( tax.toLowerCase().startsWith( TaxonomyGroups.HACROBIA ) ) { + return TaxonomyGroups.HACROBIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) || tax.equalsIgnoreCase( "glaucophyta" ) ) { + return TaxonomyGroups.GLAUCOCYSTOPHYCEAE; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.STRAMENOPILES ) ) { + return TaxonomyGroups.STRAMENOPILES; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.ALVEOLATA ) ) { + return TaxonomyGroups.ALVEOLATA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHIZARIA ) ) { + return TaxonomyGroups.RHIZARIA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.EXCAVATA ) ) { + return TaxonomyGroups.EXCAVATA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.APUSOZOA ) ) { + return TaxonomyGroups.APUSOZOA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.ARCHAEA ) ) { + return TaxonomyGroups.ARCHAEA; + } + else if ( tax.equalsIgnoreCase( TaxonomyGroups.BACTERIA ) ) { + return TaxonomyGroups.BACTERIA; + } + return null; + } + final public static BufferedReader obtainReader( final Object source ) throws IOException, FileNotFoundException { BufferedReader reader = null; if ( source instanceof File ) { @@ -832,11 +969,20 @@ public final class ForesterUtil { } else { throw new IllegalArgumentException( "attempt to parse object of type [" + source.getClass() - + "] (can only parse objects of type File, InputStream, String, or StringBuffer)" ); + + "] (can only parse objects of type File, InputStream, String, or StringBuffer)" ); } return reader; } + public final static void outOfMemoryError( final OutOfMemoryError e ) { + System.err.println(); + System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" ); + System.err.println(); + e.printStackTrace( System.err ); + System.err.println(); + System.exit( -1 ); + } + final public static StringBuffer pad( final double number, final int size, final char pad, final boolean left_pad ) { return pad( new StringBuffer( number + "" ), size, pad, left_pad ); } @@ -952,6 +1098,69 @@ public final class ForesterUtil { System.out.println( "[" + prg_name + "] > " + message ); } + public static List readUrl( final String url_str ) throws IOException { + final URL url = new URL( url_str ); + final URLConnection urlc = url.openConnection(); + //urlc.setRequestProperty( "User-Agent", "" ); + final BufferedReader in = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) ); + String line; + final List result = new ArrayList(); + while ( ( line = in.readLine() ) != null ) { + result.add( line ); + } + in.close(); + return result; + } + + /** + * + * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 => + * domain with 0.3 is ignored + * + * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored + * + * + * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_ + * ignored + * + * @param max_allowed_overlap + * maximal allowed overlap (inclusive) to be still considered not + * overlapping (zero or negative value to allow any overlap) + * @param remove_engulfed_domains + * to remove domains which are completely engulfed by coverage of + * domains with better support + * @param protein + * @return + */ + public static Protein removeOverlappingDomains( final int max_allowed_overlap, + final boolean remove_engulfed_domains, + final Protein protein ) { + final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies() + .getSpeciesId(), protein.getLength() ); + final List sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein ); + final List covered_positions = new ArrayList(); + for( final Domain domain : sorted ) { + if ( ( ( max_allowed_overlap < 0 ) || ( ForesterUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) ) + && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) { + final int covered_positions_size = covered_positions.size(); + for( int i = covered_positions_size; i < domain.getFrom(); ++i ) { + covered_positions.add( false ); + } + final int new_covered_positions_size = covered_positions.size(); + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( i < new_covered_positions_size ) { + covered_positions.set( i, true ); + } + else { + covered_positions.add( true ); + } + } + pruned_protein.addProteinDomain( domain ); + } + } + return pruned_protein; + } + final public static String removeSuffix( final String file_name ) { final int i = file_name.lastIndexOf( '.' ); if ( i > 1 ) { @@ -962,7 +1171,7 @@ public final class ForesterUtil { /** * Removes all white space from String s. - * + * * @return String s with white space removed */ final public static String removeWhiteSpace( String s ) { @@ -977,18 +1186,11 @@ public final class ForesterUtil { return s; } - final public static String replaceIllegalNhCharacters( final String nh ) { - if ( nh == null ) { - return ""; - } - return nh.trim().replaceAll( "[\\[\\]:]+", "_" ); - } - final public static String replaceIllegalNhxCharacters( final String nhx ) { if ( nhx == null ) { return ""; } - return nhx.trim().replaceAll( "[\\[\\](),:;\\s]+", "_" ); + return nhx.trim().replaceAll( "[\\[\\]']+", "_" ); } final public static double round( final double value, final int decimal_place ) { @@ -1021,6 +1223,35 @@ public final class ForesterUtil { } } + public final static StringBuilder santitizeStringForNH( String data ) { + data = data.replaceAll( "\\s+", " " ).trim(); + final StringBuilder sb = new StringBuilder(); + if ( data.length() > 0 ) { + final boolean single_pars = data.indexOf( '\'' ) > -1; + final boolean double_pars = data.indexOf( '"' ) > -1; + if ( single_pars && double_pars ) { + data = data.replace( '\'', '`' ); + sb.append( '\'' ); + sb.append( data ); + sb.append( '\'' ); + } + else if ( single_pars ) { + sb.append( '"' ); + sb.append( data ); + sb.append( '"' ); + } + else if ( PARANTHESESABLE_NH_CHARS_PATTERN.matcher( data ).find() ) { + sb.append( '\'' ); + sb.append( data ); + sb.append( '\'' ); + } + else { + sb.append( data ); + } + } + return sb; + } + public static boolean seqIsLikelyToBeAa( final String s ) { final String seq = s.toLowerCase(); if ( ( seq.indexOf( 'r' ) > -1 ) || ( seq.indexOf( 'd' ) > -1 ) || ( seq.indexOf( 'e' ) > -1 ) @@ -1032,6 +1263,11 @@ public final class ForesterUtil { return false; } + final private static String[] splitString( final String str ) { + final String regex = "[\\s;,]+"; + return str.split( regex ); + } + final public static String stringArrayToString( final String[] a ) { return stringArrayToString( a, ", " ); } @@ -1079,17 +1315,17 @@ public final class ForesterUtil { return str_array; } - final public static void unexpectedFatalError( final Exception e ) { + final public static void unexpectedFatalError( final Error e ) { System.err.println(); - System.err.println( "unexpected exception: should not have occured! Please contact program author(s)." ); + System.err.println( "unexpected error: should not have occured! Please contact program author(s)." ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); } - final public static void unexpectedFatalError( final Error e ) { + final public static void unexpectedFatalError( final Exception e ) { System.err.println(); - System.err.println( "unexpected error: should not have occured! Please contact program author(s)." ); + System.err.println( "unexpected exception: should not have occured! Please contact program author(s)." ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); @@ -1106,7 +1342,7 @@ public final class ForesterUtil { final public static void unexpectedFatalError( final String prg_name, final Exception e ) { System.err.println(); System.err.println( "[" + prg_name - + "] > unexpected error; should not have occured! Please contact program author(s)." ); + + "] > unexpected error; should not have occured! Please contact program author(s)." ); e.printStackTrace( System.err ); System.err.println(); System.exit( -1 ); @@ -1115,7 +1351,7 @@ public final class ForesterUtil { final public static void unexpectedFatalError( final String prg_name, final String message ) { System.err.println(); System.err.println( "[" + prg_name - + "] > unexpected error: should not have occured! Please contact program author(s)." ); + + "] > unexpected error: should not have occured! Please contact program author(s)." ); System.err.println( message ); System.err.println(); System.exit( -1 ); @@ -1124,7 +1360,7 @@ public final class ForesterUtil { final public static void unexpectedFatalError( final String prg_name, final String message, final Exception e ) { System.err.println(); System.err.println( "[" + prg_name - + "] > unexpected error: should not have occured! Please contact program author(s)." ); + + "] > unexpected error: should not have occured! Please contact program author(s)." ); System.err.println( message ); e.printStackTrace( System.err ); System.err.println(); @@ -1177,108 +1413,6 @@ public final class ForesterUtil { return sb.toString(); } - /** - * Helper method for calcColor methods. - * - * @param smallercolor_component_x - * color component the smaller color - * @param largercolor_component_x - * color component the larger color - * @param x - * factor - * @return an int representing a color component - */ - final private static int calculateColorComponent( final double smallercolor_component_x, - final double largercolor_component_x, - final double x ) { - return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) ); - } - - /** - * Helper method for calcColor methods. - * - * - * @param value - * the value - * @param larger - * the largest value - * @param smaller - * the smallest value - * @return a normalized value between larger and smaller - */ - final private static double calculateColorFactor( final double value, final double larger, final double smaller ) { - return ( 255.0 * ( value - smaller ) ) / ( larger - smaller ); - } - - final private static String[] splitString( final String str ) { - final String regex = "[\\s;,]+"; - return str.split( regex ); - } - - public final static void outOfMemoryError( final OutOfMemoryError e ) { - System.err.println(); - System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" ); - System.err.println(); - e.printStackTrace( System.err ); - System.err.println(); - System.exit( -1 ); - } - - public final static Color obtainColorDependingOnTaxonomyGroup( final String tax ) { - if ( tax.equalsIgnoreCase( "deuterostomia" ) ) { - return DEUTEROSTOMIA_COLOR; - } - else if ( tax.equalsIgnoreCase( "protostomia" ) ) { - return PROTOSTOMIA_COLOR; - } - else if ( tax.equalsIgnoreCase( "metazoa" ) ) { - return METAZOA_COLOR; - } - else if ( tax.equalsIgnoreCase( "holozoa" ) ) { - return HOLOZOA_COLOR; - } - else if ( tax.equalsIgnoreCase( "fungi" ) ) { - return FUNGI_COLOR; - } - else if ( tax.equalsIgnoreCase( "holomycota" ) ) { - return HOLOMYCOTA_COLOR; - } - else if ( tax.equalsIgnoreCase( "amoebozoa" ) ) { - return AMOEBOZOA_COLOR; - } - else if ( tax.equalsIgnoreCase( "viridiplantae" ) ) { - return VIRIDPLANTAE_COLOR; - } - else if ( tax.equalsIgnoreCase( "rhodophyta" ) ) { - return RHODOPHYTA_COLOR; - } - else if ( tax.toLowerCase().startsWith( "hacrobia" ) ) { - return HACROBIA_COLOR; - } - else if ( tax.equalsIgnoreCase( "glaucocystophyceae" ) || tax.equalsIgnoreCase( "glaucophyta" ) ) { - return GLAUCOPHYTA_COLOR; - } - else if ( tax.equalsIgnoreCase( "stramenopiles" ) ) { - return STRAMENOPILES_COLOR; - } - else if ( tax.equalsIgnoreCase( "alveolata" ) ) { - return ALVEOLATA_COLOR; - } - else if ( tax.equalsIgnoreCase( "rhizaria" ) ) { - return RHIZARIA_COLOR; - } - else if ( tax.equalsIgnoreCase( "excavata" ) ) { - return EXCAVATA_COLOR; - } - else if ( tax.equalsIgnoreCase( "apusozoa" ) ) { - return APUSOZOA_COLOR; - } - else if ( tax.equalsIgnoreCase( "archaea" ) ) { - return ARCHAEA_COLOR; - } - else if ( tax.equalsIgnoreCase( "bacteria" ) ) { - return BACTERIA_COLOR; - } - return null; + private ForesterUtil() { } }