X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Futil%2FForesterUtil.java;h=8036c2d8342d5ea79b84b35ba12ee8947262a2c1;hb=d4d61f9d2969283e821f650d031c169899fb3870;hp=caf4045693cb35b9fc00973926ba36762cae7804;hpb=656be28debec520e0e35a8b311114398a40ea366;p=jalview.git diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index caf4045..8036c2d 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -59,12 +59,18 @@ import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.forester.archaeopteryx.Constants; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Distribution; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; +import org.forester.protein.BasicProtein; +import org.forester.protein.Domain; +import org.forester.protein.Protein; +import org.forester.surfacing.SurfacingUtil; public final class ForesterUtil { @@ -82,6 +88,28 @@ public final class ForesterUtil { public static final NumberFormat FORMATTER_6; public static final NumberFormat FORMATTER_06; public static final NumberFormat FORMATTER_3; + public static final String NCBI_PROTEIN = "http://www.ncbi.nlm.nih.gov/protein/"; + public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/"; + public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/"; + public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:"; + public final static Color DEUTEROSTOMIA_COLOR = new Color( 255, 0, 0 ); + public final static Color PROTOSTOMIA_COLOR = new Color( 204, 0, 0 ); + public final static Color METAZOA_COLOR = new Color( 204, 0, 102 ); + public final static Color HOLOZOA_COLOR = new Color( 127, 0, 255 ); + public final static Color FUNGI_COLOR = new Color( 255, 153, 0 ); + public final static Color HOLOMYCOTA_COLOR = new Color( 204, 102, 0 ); + public final static Color AMOEBOZOA_COLOR = new Color( 255, 0, 255 ); + public final static Color VIRIDPLANTAE_COLOR = new Color( 0, 255, 0 ); + public final static Color RHODOPHYTA_COLOR = new Color( 0, 153, 76 ); + public final static Color HACROBIA_COLOR = new Color( 0, 102, 51 ); + public final static Color GLAUCOPHYTA_COLOR = new Color( 0, 102, 51 ); + public final static Color STRAMENOPILES_COLOR = new Color( 0, 0, 255 ); + public final static Color ALVEOLATA_COLOR = new Color( 0, 128, 255 ); + public final static Color RHIZARIA_COLOR = new Color( 0, 255, 255 ); + public static final Color APUSOZOA_COLOR = new Color( 204, 255, 255 ); + public final static Color EXCAVATA_COLOR = new Color( 204, 204, 0 ); + public final static Color ARCHAEA_COLOR = new Color( 160, 160, 160 ); + public final static Color BACTERIA_COLOR = new Color( 64, 64, 64 ); static { final DecimalFormatSymbols dfs = new DecimalFormatSymbols(); dfs.setDecimalSeparator( '.' ); @@ -95,6 +123,16 @@ public final class ForesterUtil { private ForesterUtil() { } + public static int calculateOverlap( final Domain domain, final List covered_positions ) { + int overlap_count = 0; + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) { + ++overlap_count; + } + } + return overlap_count; + } + final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) { if ( sb.length() > 0 ) { sb.append( separator ); @@ -102,6 +140,72 @@ public final class ForesterUtil { } /** + * + * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 => + * domain with 0.3 is ignored + * + * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored + * + * + * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_ + * ignored + * + * @param max_allowed_overlap + * maximal allowed overlap (inclusive) to be still considered not + * overlapping (zero or negative value to allow any overlap) + * @param remove_engulfed_domains + * to remove domains which are completely engulfed by coverage of + * domains with better support + * @param protein + * @return + */ + public static Protein removeOverlappingDomains( final int max_allowed_overlap, + final boolean remove_engulfed_domains, + final Protein protein ) { + final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies() + .getSpeciesId(), protein.getLength() ); + final List sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein ); + final List covered_positions = new ArrayList(); + for( final Domain domain : sorted ) { + if ( ( ( max_allowed_overlap < 0 ) || ( ForesterUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) ) + && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) { + final int covered_positions_size = covered_positions.size(); + for( int i = covered_positions_size; i < domain.getFrom(); ++i ) { + covered_positions.add( false ); + } + final int new_covered_positions_size = covered_positions.size(); + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( i < new_covered_positions_size ) { + covered_positions.set( i, true ); + } + else { + covered_positions.add( true ); + } + } + pruned_protein.addProteinDomain( domain ); + } + } + return pruned_protein; + } + + /** + * Returns true is Domain domain falls in an uninterrupted stretch of + * covered positions. + * + * @param domain + * @param covered_positions + * @return + */ + public static boolean isEngulfed( final Domain domain, final List covered_positions ) { + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) { + return false; + } + } + return true; + } + + /** * This calculates a color. If value is equal to min the returned color is * minColor, if value is equal to max the returned color is maxColor, * otherwise a color 'proportional' to value is returned. @@ -349,6 +453,34 @@ public final class ForesterUtil { return ary; } + public static String[][] file22dArray( final File file ) throws IOException { + final List list = new ArrayList(); + final BufferedReader in = new BufferedReader( new FileReader( file ) ); + String str; + while ( ( str = in.readLine() ) != null ) { + str = str.trim(); + if ( ( str.length() > 0 ) && !str.startsWith( "#" ) ) { + list.add( str ); + } + } + in.close(); + final String[][] ary = new String[ list.size() ][ 2 ]; + final Pattern pa = Pattern.compile( "(\\S+)\\s+(\\S+)" ); + int i = 0; + for( final String s : list ) { + final Matcher m = pa.matcher( s ); + if ( m.matches() ) { + ary[ i ][ 0 ] = m.group( 1 ); + ary[ i ][ 1 ] = m.group( 2 ); + ++i; + } + else { + throw new IOException( "unexpcted format: " + s ); + } + } + return ary; + } + final public static List file2list( final File file ) throws IOException { final List list = new ArrayList(); final BufferedReader in = new BufferedReader( new FileReader( file ) ); @@ -556,8 +688,24 @@ public final class ForesterUtil { return isReadableFile( new File( s ) ); } - public static boolean isWindowns() { - return ForesterUtil.OS_NAME.toLowerCase().indexOf( "win" ) > -1; + public final static boolean isWindows() { + try { + return OS_NAME.toLowerCase().indexOf( "win" ) > -1; + } + catch ( final Exception e ) { + ForesterUtil.printWarningMessage( Constants.PRG_NAME, "minor error: " + e ); + return false; + } + } + + public final static boolean isMac() { + try { + return OS_NAME.toLowerCase().startsWith( "mac" ); + } + catch ( final Exception e ) { + ForesterUtil.printWarningMessage( Constants.PRG_NAME, "minor error: " + e ); + return false; + } } final public static String isWritableFile( final File f ) { @@ -996,6 +1144,10 @@ public final class ForesterUtil { System.out.print( "]" ); } + public final static void updateProgress( final int i, final DecimalFormat f ) { + System.out.print( "\r[" + f.format( i ) + "]" ); + } + public final static String wordWrap( final String str, final int width ) { final StringBuilder sb = new StringBuilder( str ); int start = 0; @@ -1071,4 +1223,62 @@ public final class ForesterUtil { System.err.println(); System.exit( -1 ); } + + public final static Color obtainColorDependingOnTaxonomyGroup( final String tax ) { + if ( tax.equalsIgnoreCase( "deuterostomia" ) ) { + return DEUTEROSTOMIA_COLOR; + } + else if ( tax.equalsIgnoreCase( "protostomia" ) ) { + return PROTOSTOMIA_COLOR; + } + else if ( tax.equalsIgnoreCase( "metazoa" ) ) { + return METAZOA_COLOR; + } + else if ( tax.equalsIgnoreCase( "holozoa" ) ) { + return HOLOZOA_COLOR; + } + else if ( tax.equalsIgnoreCase( "fungi" ) ) { + return FUNGI_COLOR; + } + else if ( tax.equalsIgnoreCase( "holomycota" ) ) { + return HOLOMYCOTA_COLOR; + } + else if ( tax.equalsIgnoreCase( "amoebozoa" ) ) { + return AMOEBOZOA_COLOR; + } + else if ( tax.equalsIgnoreCase( "viridiplantae" ) ) { + return VIRIDPLANTAE_COLOR; + } + else if ( tax.equalsIgnoreCase( "rhodophyta" ) ) { + return RHODOPHYTA_COLOR; + } + else if ( tax.toLowerCase().startsWith( "hacrobia" ) ) { + return HACROBIA_COLOR; + } + else if ( tax.equalsIgnoreCase( "glaucocystophyceae" ) || tax.equalsIgnoreCase( "glaucophyta" ) ) { + return GLAUCOPHYTA_COLOR; + } + else if ( tax.equalsIgnoreCase( "stramenopiles" ) ) { + return STRAMENOPILES_COLOR; + } + else if ( tax.equalsIgnoreCase( "alveolata" ) ) { + return ALVEOLATA_COLOR; + } + else if ( tax.equalsIgnoreCase( "rhizaria" ) ) { + return RHIZARIA_COLOR; + } + else if ( tax.equalsIgnoreCase( "excavata" ) ) { + return EXCAVATA_COLOR; + } + else if ( tax.equalsIgnoreCase( "apusozoa" ) ) { + return APUSOZOA_COLOR; + } + else if ( tax.equalsIgnoreCase( "archaea" ) ) { + return ARCHAEA_COLOR; + } + else if ( tax.equalsIgnoreCase( "bacteria" ) ) { + return BACTERIA_COLOR; + } + return null; + } }