From: cmzmasek@gmail.com Date: Wed, 23 Oct 2013 21:50:38 +0000 (+0000) Subject: inprogress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=0898ccf757a9e1b3c10f0a8ad51829eb809d3062;p=jalview.git inprogress --- diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index cbe0dd4..ce11dce 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -2084,7 +2084,8 @@ public class surfacing { domain_similarity_print_option, scoring, true, - tax_code_to_id_map ); + tax_code_to_id_map, + intrees[ 0 ] ); simple_tab_writer.close(); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote main output (includes domain similarities) to: \"" + ( out_dir == null ? my_outfile : out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) + "\"" ); @@ -2123,7 +2124,8 @@ public class surfacing { out_dir, write_pwc_files, tax_code_to_id_map, - CALC_SIMILARITY_SCORES ); + CALC_SIMILARITY_SCORES, + intrees[ 0 ] ); String matrix_output_file = new String( output_file.toString() ); if ( matrix_output_file.indexOf( '.' ) > 1 ) { matrix_output_file = matrix_output_file.substring( 0, matrix_output_file.indexOf( '.' ) ); diff --git a/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java b/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java index 2fae940..eda7e55 100644 --- a/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java +++ b/forester/java/src/org/forester/io/parsers/HmmPfamOutputParser.java @@ -44,7 +44,6 @@ import org.forester.protein.BasicDomain; import org.forester.protein.BasicProtein; import org.forester.protein.Domain; import org.forester.protein.Protein; -import org.forester.surfacing.SurfacingUtil; import org.forester.util.ForesterUtil; public final class HmmPfamOutputParser { @@ -408,9 +407,9 @@ public final class HmmPfamOutputParser { if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT ) || isIgnoreEngulfedDomains() ) { final int domains_count = current_protein.getNumberOfProteinDomains(); - current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(), - isIgnoreEngulfedDomains(), - current_protein ); + current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(), + isIgnoreEngulfedDomains(), + current_protein ); final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains(); _domains_stored -= domains_removed; _domains_ignored_due_to_overlap += domains_removed; diff --git a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java index dfde916..f595349 100644 --- a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java +++ b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java @@ -45,7 +45,6 @@ import org.forester.protein.BasicDomain; import org.forester.protein.BasicProtein; import org.forester.protein.Domain; import org.forester.protein.Protein; -import org.forester.surfacing.SurfacingUtil; import org.forester.util.ForesterUtil; public final class HmmscanPerDomainTableParser { @@ -165,9 +164,9 @@ public final class HmmscanPerDomainTableParser { if ( ( getMaxAllowedOverlap() != HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT ) || isIgnoreEngulfedDomains() ) { final int domains_count = current_protein.getNumberOfProteinDomains(); - current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(), - isIgnoreEngulfedDomains(), - current_protein ); + current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(), + isIgnoreEngulfedDomains(), + current_protein ); final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains(); _domains_stored -= domains_removed; _domains_ignored_due_to_overlap += domains_removed; diff --git a/forester/java/src/org/forester/surfacing/DomainSimilarity.java b/forester/java/src/org/forester/surfacing/DomainSimilarity.java index bf9cef4..5a0735e 100644 --- a/forester/java/src/org/forester/surfacing/DomainSimilarity.java +++ b/forester/java/src/org/forester/surfacing/DomainSimilarity.java @@ -30,6 +30,7 @@ import java.util.Map; import java.util.SortedMap; import java.util.SortedSet; +import org.forester.phylogeny.Phylogeny; import org.forester.species.Species; import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION; @@ -101,5 +102,7 @@ public interface DomainSimilarity extends Comparable { public double getStandardDeviationOfSimilarityScore(); - public StringBuffer toStringBuffer( PRINT_OPTION print_option, Map tax_code_to_id_map ); + public StringBuffer toStringBuffer( PRINT_OPTION print_option, + Map tax_code_to_id_map, + Phylogeny phy ); } diff --git a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java index eba55ae..4699869 100644 --- a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java +++ b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java @@ -43,6 +43,7 @@ import org.forester.evoinference.matrix.distance.DistanceMatrix; import org.forester.go.GoId; import org.forester.go.GoNameSpace; import org.forester.go.GoTerm; +import org.forester.phylogeny.Phylogeny; import org.forester.species.Species; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; import org.forester.util.DescriptiveStatistics; @@ -98,7 +99,8 @@ public class PairwiseGenomeComparator { final File out_dir, final boolean write_pairwise_comparisons, final Map tax_code_to_id_map, - final boolean calc_similarity_scores ) { + final boolean calc_similarity_scores, + Phylogeny phy ) { init(); final BasicSymmetricalDistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes ); final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes ); @@ -215,7 +217,8 @@ public class PairwiseGenomeComparator { domain_similarity_print_option, scoring, false, - tax_code_to_id_map ); + tax_code_to_id_map, + phy ); } catch ( final IOException e ) { ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \"" diff --git a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java index 905e928..931fa32 100644 --- a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java +++ b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java @@ -26,6 +26,7 @@ package org.forester.surfacing; +import java.awt.Color; import java.util.List; import java.util.Map; import java.util.SortedMap; @@ -33,6 +34,8 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; import org.forester.species.Species; import org.forester.surfacing.DomainSimilarityCalculator.Detailedness; import org.forester.util.ForesterUtil; @@ -154,9 +157,10 @@ public class PrintableDomainSimilarity implements DomainSimilarity { private void addSpeciesSpecificDomainData( final StringBuffer sb, final Species species, final boolean html, - final Map tax_code_to_id_map ) { + final Map tax_code_to_id_map, + final Phylogeny phy ) { if ( html ) { - addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map ); + addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map, phy ); } else { sb.append( species.getSpeciesId() ); @@ -180,12 +184,21 @@ public class PrintableDomainSimilarity implements DomainSimilarity { private void addTaxWithLink( final StringBuffer sb, final String tax_code, - final Map tax_code_to_id_map ) { + final Map tax_code_to_id_map, + final Phylogeny phy ) { + Color c = null; + if ( phy != null && !phy.isEmpty() ) { + c = getColorDependingOnTaxonomy( tax_code, phy ); + } + if ( c == null ) { + c = new Color( 0, 0, 0 ); + } + final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() ); sb.append( "" ); if ( !ForesterUtil.isEmpty( tax_code ) && ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) { sb.append( "" + tax_code + "" ); + + "\" target=\"taxonomy_window\" color=\"" + hex + "\">" + tax_code + "" ); } else { sb.append( tax_code ); @@ -193,6 +206,33 @@ public class PrintableDomainSimilarity implements DomainSimilarity { sb.append( "" ); } + private Color getColorDependingOnTaxonomy( final String tax_code, final Phylogeny phy ) { + List nodes = phy.getNodesViaTaxonomyCode( tax_code ); + Color c = null; + if ( nodes == null || nodes.isEmpty() ) { + throw new RuntimeException( tax_code + " is not found" ); + } + if ( nodes.size() != 1 ) { + throw new RuntimeException( tax_code + " is not unique" ); + } + PhylogenyNode n = nodes.get( 0 ); + while ( n != null ) { + c = null; + if ( n.getNodeData().isHasTaxonomy() + && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { + c = SurfacingUtil.getColorForTaxCode( n.getNodeData().getTaxonomy().getScientificName() ); + } + if ( c == null && !ForesterUtil.isEmpty( n.getName() ) ) { + c = SurfacingUtil.getColorForTaxCode( n.getName() ); + } + if ( c != null ) { + break; + } + n = n.getParent(); + } + return c; + } + private int compareByDomainId( final DomainSimilarity other ) { return getDomainId().compareToIgnoreCase( other.getDomainId() ); } @@ -286,10 +326,11 @@ public class PrintableDomainSimilarity implements DomainSimilarity { } private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html, - final Map tax_code_to_id_map ) { + final Map tax_code_to_id_map, + final Phylogeny phy ) { final StringBuffer sb = new StringBuffer(); for( final Species species : getSpeciesData().keySet() ) { - addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map ); + addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy ); } return sb; } @@ -317,11 +358,13 @@ public class PrintableDomainSimilarity implements DomainSimilarity { return sb; } - private StringBuffer getSpeciesDataInCustomOrder( final boolean html, final Map tax_code_to_id_map ) { + private StringBuffer getSpeciesDataInCustomOrder( final boolean html, + final Map tax_code_to_id_map, + final Phylogeny phy ) { final StringBuffer sb = new StringBuffer(); for( final Species order_species : getSpeciesCustomOrder() ) { if ( getSpeciesData().keySet().contains( order_species ) ) { - addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map ); + addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy ); } else { sb.append( PrintableDomainSimilarity.NO_SPECIES ); @@ -357,18 +400,19 @@ public class PrintableDomainSimilarity implements DomainSimilarity { @Override public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option, - final Map tax_code_to_id_map ) { + final Map tax_code_to_id_map, + Phylogeny phy ) { switch ( print_option ) { case SIMPLE_TAB_DELIMITED: return toStringBufferSimpleTabDelimited(); case HTML: - return toStringBufferDetailedHTML( tax_code_to_id_map ); + return toStringBufferDetailedHTML( tax_code_to_id_map, phy ); default: throw new AssertionError( "Unknown print option: " + print_option ); } } - private StringBuffer toStringBufferDetailedHTML( final Map tax_code_to_id_map ) { + private StringBuffer toStringBufferDetailedHTML( final Map tax_code_to_id_map, Phylogeny phy ) { final StringBuffer sb = new StringBuffer(); sb.append( "" ); sb.append( "" ); @@ -423,13 +467,13 @@ public class PrintableDomainSimilarity implements DomainSimilarity { } if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) { sb.append( "" ); - sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map ) ); + sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map, phy ) ); sb.append( getDomainDataInAlphabeticalOrder() ); sb.append( "" ); } else { sb.append( "" ); - sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map ) ); + sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) ); sb.append( getDomainDataInAlphabeticalOrder() ); sb.append( "" ); } @@ -441,7 +485,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity { final StringBuffer sb = new StringBuffer(); sb.append( getDomainId() ); sb.append( "\t" ); - sb.append( getSpeciesDataInAlphabeticalOrder( false, null ) ); + sb.append( getSpeciesDataInAlphabeticalOrder( false, null, null ) ); sb.append( "\n" ); return sb; } diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index ecb0839..27678c4 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -26,6 +26,7 @@ package org.forester.surfacing; +import java.awt.Color; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; @@ -191,16 +192,6 @@ public final class SurfacingUtil { return stats; } - public static int calculateOverlap( final Domain domain, final List covered_positions ) { - int overlap_count = 0; - for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { - if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) { - ++overlap_count; - } - } - return overlap_count; - } - public static void checkForOutputFileWriteability( final File outfile ) { final String error = ForesterUtil.isWritableFile( outfile ); if ( !ForesterUtil.isEmpty( error ) ) { @@ -1029,23 +1020,6 @@ public final class SurfacingUtil { return c; } - /** - * Returns true is Domain domain falls in an uninterrupted stretch of - * covered positions. - * - * @param domain - * @param covered_positions - * @return - */ - public static boolean isEngulfed( final Domain domain, final List covered_positions ) { - for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { - if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) { - return false; - } - } - return true; - } - public static void performDomainArchitectureAnalysis( final SortedMap> domain_architecutures, final SortedMap domain_architecuture_counts, final int min_count, @@ -1203,55 +1177,6 @@ public final class SurfacingUtil { return sb; } - /** - * - * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 => - * domain with 0.3 is ignored - * - * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored - * - * - * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_ - * ignored - * - * @param max_allowed_overlap - * maximal allowed overlap (inclusive) to be still considered not - * overlapping (zero or negative value to allow any overlap) - * @param remove_engulfed_domains - * to remove domains which are completely engulfed by coverage of - * domains with better support - * @param protein - * @return - */ - public static Protein removeOverlappingDomains( final int max_allowed_overlap, - final boolean remove_engulfed_domains, - final Protein protein ) { - final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies() - .getSpeciesId(), protein.getLength() ); - final List sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein ); - final List covered_positions = new ArrayList(); - for( final Domain domain : sorted ) { - if ( ( ( max_allowed_overlap < 0 ) || ( SurfacingUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) ) - && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) { - final int covered_positions_size = covered_positions.size(); - for( int i = covered_positions_size; i < domain.getFrom(); ++i ) { - covered_positions.add( false ); - } - final int new_covered_positions_size = covered_positions.size(); - for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { - if ( i < new_covered_positions_size ) { - covered_positions.set( i, true ); - } - else { - covered_positions.add( true ); - } - } - pruned_protein.addProteinDomain( domain ); - } - } - return pruned_protein; - } - public static List sortDomainsWithAscendingConfidenceValues( final Protein protein ) { final List domains = new ArrayList(); for( final Domain d : protein.getProteinDomains() ) { @@ -1677,8 +1602,8 @@ public final class SurfacingUtil { final PrintableDomainSimilarity.PRINT_OPTION print_option, final DomainSimilarity.DomainSimilarityScoring scoring, final boolean verbose, - final Map tax_code_to_id_map ) - throws IOException { + final Map tax_code_to_id_map, + Phylogeny phy ) throws IOException { if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) { split_writers = new HashMap(); split_writers.put( '_', single_writer ); @@ -1749,10 +1674,11 @@ public final class SurfacingUtil { } if ( simple_tab_writer != null ) { simple_tab_writer.write( similarity.toStringBuffer( PRINT_OPTION.SIMPLE_TAB_DELIMITED, - tax_code_to_id_map ).toString() ); + tax_code_to_id_map, + null ).toString() ); } if ( single_writer != null ) { - single_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map ).toString() ); + single_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map, phy ).toString() ); single_writer.write( SurfacingConstants.NL ); } else { @@ -1761,7 +1687,7 @@ public final class SurfacingUtil { if ( local_writer == null ) { local_writer = split_writers.get( '0' ); } - local_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map ).toString() ); + local_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map, phy ).toString() ); local_writer.write( SurfacingConstants.NL ); } } @@ -2649,4 +2575,56 @@ public final class SurfacingUtil { return 0; } } + + final static Color getColorForTaxCode( final String tax ) { + if ( tax.equals( "Deuterostomia" ) ) { + return ForesterUtil.DEUTEROSTOMIA_COLOR; + } + else if ( tax.equals( "Protostomia" ) ) { + return ForesterUtil.PROTOSTOMIA_COLOR; + } + else if ( tax.equals( "Metazoa" ) ) { + return ForesterUtil.METAZOA_COLOR; + } + else if ( tax.equals( "Holozoa" ) ) { + return ForesterUtil.HOLOZOA_COLOR; + } + else if ( tax.equals( "Fungi" ) ) { + return ForesterUtil.FUNGI_COLOR; + } + else if ( tax.equals( "Holomycota" ) ) { + return ForesterUtil.HOLOMYCOTA_COLOR; + } + else if ( tax.equals( "Amoebozoa" ) ) { + return ForesterUtil.AMOEBOZOA_COLOR; + } + else if ( tax.equals( "Viridiplantae" ) ) { + return ForesterUtil.VIRIDPLANTAE_COLOR; + } + else if ( tax.equals( "Rhodophytaa" ) ) { + return ForesterUtil.RHODOPHYTA_COLOR; + } + else if ( tax.startsWith( "Hacrobia" ) ) { + return ForesterUtil.HACROBIA_COLOR; + } + else if ( tax.equals( "Stramenopiles" ) ) { + return ForesterUtil.STRAMENOPILES_COLOR; + } + else if ( tax.equals( "Alveolata" ) ) { + return ForesterUtil.ALVEOLATA_COLOR; + } + else if ( tax.equals( "Rhizaria" ) ) { + return ForesterUtil.RHIZARIA_COLOR; + } + else if ( tax.equals( "Excavata" ) ) { + return ForesterUtil.EXCAVATA_COLOR; + } + else if ( tax.equals( "Archaea" ) ) { + return ForesterUtil.ARCHAEA_COLOR; + } + else if ( tax.equals( "Bacteria" ) ) { + return ForesterUtil.BACTERIA_COLOR; + } + return null; + } } diff --git a/forester/java/src/org/forester/surfacing/TestSurfacing.java b/forester/java/src/org/forester/surfacing/TestSurfacing.java index c2b857c..168b6a4 100644 --- a/forester/java/src/org/forester/surfacing/TestSurfacing.java +++ b/forester/java/src/org/forester/surfacing/TestSurfacing.java @@ -147,18 +147,6 @@ public class TestSurfacing { return false; } System.out.println( "OK." ); - System.out.print( " Overlap removal: " ); - if ( !TestSurfacing.testOverlapRemoval() ) { - System.out.println( "failed." ); - return false; - } - System.out.println( "OK." ); - System.out.print( " Engulfing overlap removal: " ); - if ( !TestSurfacing.testEngulfingOverlapRemoval() ) { - System.out.println( "failed." ); - return false; - } - System.out.println( "OK." ); System.out.print( " Binary domain combination: " ); if ( !TestSurfacing.testBinaryDomainCombination() ) { System.out.println( "failed." ); @@ -4169,105 +4157,6 @@ public class TestSurfacing { return true; } - private static boolean testEngulfingOverlapRemoval() { - try { - final Domain d0 = new BasicDomain( "d0", 0, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d1 = new BasicDomain( "d1", 0, 1, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d2 = new BasicDomain( "d2", 0, 2, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d3 = new BasicDomain( "d3", 7, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d4 = new BasicDomain( "d4", 7, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d5 = new BasicDomain( "d4", 0, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d6 = new BasicDomain( "d4", 4, 5, ( short ) 1, ( short ) 1, 0.1, 1 ); - final List covered = new ArrayList(); - covered.add( true ); // 0 - covered.add( false ); // 1 - covered.add( true ); // 2 - covered.add( false ); // 3 - covered.add( true ); // 4 - covered.add( true ); // 5 - covered.add( false ); // 6 - covered.add( true ); // 7 - covered.add( true ); // 8 - if ( SurfacingUtil.isEngulfed( d0, covered ) ) { - return false; - } - if ( SurfacingUtil.isEngulfed( d1, covered ) ) { - return false; - } - if ( SurfacingUtil.isEngulfed( d2, covered ) ) { - return false; - } - if ( !SurfacingUtil.isEngulfed( d3, covered ) ) { - return false; - } - if ( SurfacingUtil.isEngulfed( d4, covered ) ) { - return false; - } - if ( SurfacingUtil.isEngulfed( d5, covered ) ) { - return false; - } - if ( !SurfacingUtil.isEngulfed( d6, covered ) ) { - return false; - } - final Domain a = new BasicDomain( "a", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain b = new BasicDomain( "b", 8, 20, ( short ) 1, ( short ) 1, 0.2, 1 ); - final Domain c = new BasicDomain( "c", 15, 16, ( short ) 1, ( short ) 1, 0.3, 1 ); - final Protein abc = new BasicProtein( "abc", "nemve", 0 ); - abc.addProteinDomain( a ); - abc.addProteinDomain( b ); - abc.addProteinDomain( c ); - final Protein abc_r1 = SurfacingUtil.removeOverlappingDomains( 3, false, abc ); - final Protein abc_r2 = SurfacingUtil.removeOverlappingDomains( 3, true, abc ); - if ( abc.getNumberOfProteinDomains() != 3 ) { - return false; - } - if ( abc_r1.getNumberOfProteinDomains() != 3 ) { - return false; - } - if ( abc_r2.getNumberOfProteinDomains() != 2 ) { - return false; - } - if ( !abc_r2.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) { - return false; - } - if ( !abc_r2.getProteinDomain( 1 ).getDomainId().equals( "b" ) ) { - return false; - } - final Domain d = new BasicDomain( "d", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain e = new BasicDomain( "e", 8, 20, ( short ) 1, ( short ) 1, 0.3, 1 ); - final Domain f = new BasicDomain( "f", 15, 16, ( short ) 1, ( short ) 1, 0.2, 1 ); - final Protein def = new BasicProtein( "def", "nemve", 0 ); - def.addProteinDomain( d ); - def.addProteinDomain( e ); - def.addProteinDomain( f ); - final Protein def_r1 = SurfacingUtil.removeOverlappingDomains( 5, false, def ); - final Protein def_r2 = SurfacingUtil.removeOverlappingDomains( 5, true, def ); - if ( def.getNumberOfProteinDomains() != 3 ) { - return false; - } - if ( def_r1.getNumberOfProteinDomains() != 3 ) { - return false; - } - if ( def_r2.getNumberOfProteinDomains() != 3 ) { - return false; - } - if ( !def_r2.getProteinDomain( 0 ).getDomainId().equals( "d" ) ) { - return false; - } - if ( !def_r2.getProteinDomain( 1 ).getDomainId().equals( "f" ) ) { - return false; - } - if ( !def_r2.getProteinDomain( 2 ).getDomainId().equals( "e" ) ) { - return false; - } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - private static boolean testGenomeWideCombinableDomains() { try { final Domain a = new BasicDomain( "a", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 ); @@ -4999,160 +4888,6 @@ public class TestSurfacing { return true; } - private static boolean testOverlapRemoval() { - try { - final Domain d0 = new BasicDomain( "d0", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d1 = new BasicDomain( "d1", ( short ) 7, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d2 = new BasicDomain( "d2", ( short ) 0, ( short ) 20, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d3 = new BasicDomain( "d3", ( short ) 9, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Domain d4 = new BasicDomain( "d4", ( short ) 7, ( short ) 8, ( short ) 1, ( short ) 1, 0.1, 1 ); - final List covered = new ArrayList(); - covered.add( true ); // 0 - covered.add( false ); // 1 - covered.add( true ); // 2 - covered.add( false ); // 3 - covered.add( true ); // 4 - covered.add( true ); // 5 - covered.add( false ); // 6 - covered.add( true ); // 7 - covered.add( true ); // 8 - if ( SurfacingUtil.calculateOverlap( d0, covered ) != 3 ) { - return false; - } - if ( SurfacingUtil.calculateOverlap( d1, covered ) != 2 ) { - return false; - } - if ( SurfacingUtil.calculateOverlap( d2, covered ) != 6 ) { - return false; - } - if ( SurfacingUtil.calculateOverlap( d3, covered ) != 0 ) { - return false; - } - if ( SurfacingUtil.calculateOverlap( d4, covered ) != 2 ) { - return false; - } - final Domain a = new BasicDomain( "a", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.01, 1 ); - final Domain b = new BasicDomain( "b", ( short ) 2, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); - final Protein ab = new BasicProtein( "ab", "varanus", 0 ); - ab.addProteinDomain( a ); - ab.addProteinDomain( b ); - final Protein ab_s0 = SurfacingUtil.removeOverlappingDomains( 3, false, ab ); - if ( ab.getNumberOfProteinDomains() != 2 ) { - return false; - } - if ( ab_s0.getNumberOfProteinDomains() != 1 ) { - return false; - } - if ( !ab_s0.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) { - return false; - } - final Protein ab_s1 = SurfacingUtil.removeOverlappingDomains( 4, false, ab ); - if ( ab.getNumberOfProteinDomains() != 2 ) { - return false; - } - if ( ab_s1.getNumberOfProteinDomains() != 2 ) { - return false; - } - final Domain c = new BasicDomain( "c", ( short ) 20000, ( short ) 20500, ( short ) 1, ( short ) 1, 10, 1 ); - final Domain d = new BasicDomain( "d", - ( short ) 10000, - ( short ) 10500, - ( short ) 1, - ( short ) 1, - 0.0000001, - 1 ); - final Domain e = new BasicDomain( "e", ( short ) 5000, ( short ) 5500, ( short ) 1, ( short ) 1, 0.0001, 1 ); - final Protein cde = new BasicProtein( "cde", "varanus", 0 ); - cde.addProteinDomain( c ); - cde.addProteinDomain( d ); - cde.addProteinDomain( e ); - final Protein cde_s0 = SurfacingUtil.removeOverlappingDomains( 0, false, cde ); - if ( cde.getNumberOfProteinDomains() != 3 ) { - return false; - } - if ( cde_s0.getNumberOfProteinDomains() != 3 ) { - return false; - } - final Domain f = new BasicDomain( "f", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); - final Domain g = new BasicDomain( "g", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); - final Domain h = new BasicDomain( "h", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); - final Domain i = new BasicDomain( "i", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.5, 1 ); - final Domain i2 = new BasicDomain( "i", ( short ) 5, ( short ) 30, ( short ) 1, ( short ) 1, 0.5, 10 ); - final Protein fghi = new BasicProtein( "fghi", "varanus", 0 ); - fghi.addProteinDomain( f ); - fghi.addProteinDomain( g ); - fghi.addProteinDomain( h ); - fghi.addProteinDomain( i ); - fghi.addProteinDomain( i ); - fghi.addProteinDomain( i ); - fghi.addProteinDomain( i2 ); - final Protein fghi_s0 = SurfacingUtil.removeOverlappingDomains( 10, false, fghi ); - if ( fghi.getNumberOfProteinDomains() != 7 ) { - return false; - } - if ( fghi_s0.getNumberOfProteinDomains() != 1 ) { - return false; - } - if ( !fghi_s0.getProteinDomain( 0 ).getDomainId().equals( "h" ) ) { - return false; - } - final Protein fghi_s1 = SurfacingUtil.removeOverlappingDomains( 11, false, fghi ); - if ( fghi.getNumberOfProteinDomains() != 7 ) { - return false; - } - if ( fghi_s1.getNumberOfProteinDomains() != 7 ) { - return false; - } - final Domain j = new BasicDomain( "j", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); - final Domain k = new BasicDomain( "k", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); - final Domain l = new BasicDomain( "l", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); - final Domain m = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 4, 0.5, 1 ); - final Domain m0 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 2, ( short ) 4, 0.5, 1 ); - final Domain m1 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 3, ( short ) 4, 0.5, 1 ); - final Domain m2 = new BasicDomain( "m", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); - final Protein jklm = new BasicProtein( "jklm", "varanus", 0 ); - jklm.addProteinDomain( j ); - jklm.addProteinDomain( k ); - jklm.addProteinDomain( l ); - jklm.addProteinDomain( m ); - jklm.addProteinDomain( m0 ); - jklm.addProteinDomain( m1 ); - jklm.addProteinDomain( m2 ); - final Protein jklm_s0 = SurfacingUtil.removeOverlappingDomains( 10, false, jklm ); - if ( jklm.getNumberOfProteinDomains() != 7 ) { - return false; - } - if ( jklm_s0.getNumberOfProteinDomains() != 1 ) { - return false; - } - if ( !jklm_s0.getProteinDomain( 0 ).getDomainId().equals( "l" ) ) { - return false; - } - final Protein jklm_s1 = SurfacingUtil.removeOverlappingDomains( 11, false, jklm ); - if ( jklm.getNumberOfProteinDomains() != 7 ) { - return false; - } - if ( jklm_s1.getNumberOfProteinDomains() != 7 ) { - return false; - } - final Domain only = new BasicDomain( "only", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); - final Protein od = new BasicProtein( "od", "varanus", 0 ); - od.addProteinDomain( only ); - final Protein od_s0 = SurfacingUtil.removeOverlappingDomains( 0, false, od ); - if ( od.getNumberOfProteinDomains() != 1 ) { - return false; - } - if ( od_s0.getNumberOfProteinDomains() != 1 ) { - return false; - } - } - catch ( final Exception e ) { - e.printStackTrace( System.out ); - return false; - } - return true; - } - private static boolean testParsimony() { try { final BinaryStates X = BinaryStates.PRESENT; diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index eacd799..9326fd1 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -143,6 +143,259 @@ public final class Test { + ForesterConstants.PHYLO_XML_VERSION + "/" + ForesterConstants.PHYLO_XML_XSD; + public static boolean testOverlapRemoval() { + try { + final Domain d0 = new BasicDomain( "d0", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d1 = new BasicDomain( "d1", ( short ) 7, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d2 = new BasicDomain( "d2", ( short ) 0, ( short ) 20, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d3 = new BasicDomain( "d3", ( short ) 9, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d4 = new BasicDomain( "d4", ( short ) 7, ( short ) 8, ( short ) 1, ( short ) 1, 0.1, 1 ); + final List covered = new ArrayList(); + covered.add( true ); // 0 + covered.add( false ); // 1 + covered.add( true ); // 2 + covered.add( false ); // 3 + covered.add( true ); // 4 + covered.add( true ); // 5 + covered.add( false ); // 6 + covered.add( true ); // 7 + covered.add( true ); // 8 + if ( ForesterUtil.calculateOverlap( d0, covered ) != 3 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d1, covered ) != 2 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d2, covered ) != 6 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d3, covered ) != 0 ) { + return false; + } + if ( ForesterUtil.calculateOverlap( d4, covered ) != 2 ) { + return false; + } + final Domain a = new BasicDomain( "a", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.01, 1 ); + final Domain b = new BasicDomain( "b", ( short ) 2, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Protein ab = new BasicProtein( "ab", "varanus", 0 ); + ab.addProteinDomain( a ); + ab.addProteinDomain( b ); + final Protein ab_s0 = ForesterUtil.removeOverlappingDomains( 3, false, ab ); + if ( ab.getNumberOfProteinDomains() != 2 ) { + return false; + } + if ( ab_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( !ab_s0.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) { + return false; + } + final Protein ab_s1 = ForesterUtil.removeOverlappingDomains( 4, false, ab ); + if ( ab.getNumberOfProteinDomains() != 2 ) { + return false; + } + if ( ab_s1.getNumberOfProteinDomains() != 2 ) { + return false; + } + final Domain c = new BasicDomain( "c", ( short ) 20000, ( short ) 20500, ( short ) 1, ( short ) 1, 10, 1 ); + final Domain d = new BasicDomain( "d", + ( short ) 10000, + ( short ) 10500, + ( short ) 1, + ( short ) 1, + 0.0000001, + 1 ); + final Domain e = new BasicDomain( "e", ( short ) 5000, ( short ) 5500, ( short ) 1, ( short ) 1, 0.0001, 1 ); + final Protein cde = new BasicProtein( "cde", "varanus", 0 ); + cde.addProteinDomain( c ); + cde.addProteinDomain( d ); + cde.addProteinDomain( e ); + final Protein cde_s0 = ForesterUtil.removeOverlappingDomains( 0, false, cde ); + if ( cde.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( cde_s0.getNumberOfProteinDomains() != 3 ) { + return false; + } + final Domain f = new BasicDomain( "f", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); + final Domain g = new BasicDomain( "g", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); + final Domain h = new BasicDomain( "h", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); + final Domain i = new BasicDomain( "i", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.5, 1 ); + final Domain i2 = new BasicDomain( "i", ( short ) 5, ( short ) 30, ( short ) 1, ( short ) 1, 0.5, 10 ); + final Protein fghi = new BasicProtein( "fghi", "varanus", 0 ); + fghi.addProteinDomain( f ); + fghi.addProteinDomain( g ); + fghi.addProteinDomain( h ); + fghi.addProteinDomain( i ); + fghi.addProteinDomain( i ); + fghi.addProteinDomain( i ); + fghi.addProteinDomain( i2 ); + final Protein fghi_s0 = ForesterUtil.removeOverlappingDomains( 10, false, fghi ); + if ( fghi.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( fghi_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( !fghi_s0.getProteinDomain( 0 ).getDomainId().equals( "h" ) ) { + return false; + } + final Protein fghi_s1 = ForesterUtil.removeOverlappingDomains( 11, false, fghi ); + if ( fghi.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( fghi_s1.getNumberOfProteinDomains() != 7 ) { + return false; + } + final Domain j = new BasicDomain( "j", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 ); + final Domain k = new BasicDomain( "k", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 ); + final Domain l = new BasicDomain( "l", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 ); + final Domain m = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 4, 0.5, 1 ); + final Domain m0 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 2, ( short ) 4, 0.5, 1 ); + final Domain m1 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 3, ( short ) 4, 0.5, 1 ); + final Domain m2 = new BasicDomain( "m", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); + final Protein jklm = new BasicProtein( "jklm", "varanus", 0 ); + jklm.addProteinDomain( j ); + jklm.addProteinDomain( k ); + jklm.addProteinDomain( l ); + jklm.addProteinDomain( m ); + jklm.addProteinDomain( m0 ); + jklm.addProteinDomain( m1 ); + jklm.addProteinDomain( m2 ); + final Protein jklm_s0 = ForesterUtil.removeOverlappingDomains( 10, false, jklm ); + if ( jklm.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( jklm_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( !jklm_s0.getProteinDomain( 0 ).getDomainId().equals( "l" ) ) { + return false; + } + final Protein jklm_s1 = ForesterUtil.removeOverlappingDomains( 11, false, jklm ); + if ( jklm.getNumberOfProteinDomains() != 7 ) { + return false; + } + if ( jklm_s1.getNumberOfProteinDomains() != 7 ) { + return false; + } + final Domain only = new BasicDomain( "only", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 ); + final Protein od = new BasicProtein( "od", "varanus", 0 ); + od.addProteinDomain( only ); + final Protein od_s0 = ForesterUtil.removeOverlappingDomains( 0, false, od ); + if ( od.getNumberOfProteinDomains() != 1 ) { + return false; + } + if ( od_s0.getNumberOfProteinDomains() != 1 ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + public static boolean testEngulfingOverlapRemoval() { + try { + final Domain d0 = new BasicDomain( "d0", 0, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d1 = new BasicDomain( "d1", 0, 1, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d2 = new BasicDomain( "d2", 0, 2, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d3 = new BasicDomain( "d3", 7, 8, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d4 = new BasicDomain( "d4", 7, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d5 = new BasicDomain( "d4", 0, 9, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain d6 = new BasicDomain( "d4", 4, 5, ( short ) 1, ( short ) 1, 0.1, 1 ); + final List covered = new ArrayList(); + covered.add( true ); // 0 + covered.add( false ); // 1 + covered.add( true ); // 2 + covered.add( false ); // 3 + covered.add( true ); // 4 + covered.add( true ); // 5 + covered.add( false ); // 6 + covered.add( true ); // 7 + covered.add( true ); // 8 + if ( ForesterUtil.isEngulfed( d0, covered ) ) { + return false; + } + if ( ForesterUtil.isEngulfed( d1, covered ) ) { + return false; + } + if ( ForesterUtil.isEngulfed( d2, covered ) ) { + return false; + } + if ( !ForesterUtil.isEngulfed( d3, covered ) ) { + return false; + } + if ( ForesterUtil.isEngulfed( d4, covered ) ) { + return false; + } + if ( ForesterUtil.isEngulfed( d5, covered ) ) { + return false; + } + if ( !ForesterUtil.isEngulfed( d6, covered ) ) { + return false; + } + final Domain a = new BasicDomain( "a", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain b = new BasicDomain( "b", 8, 20, ( short ) 1, ( short ) 1, 0.2, 1 ); + final Domain c = new BasicDomain( "c", 15, 16, ( short ) 1, ( short ) 1, 0.3, 1 ); + final Protein abc = new BasicProtein( "abc", "nemve", 0 ); + abc.addProteinDomain( a ); + abc.addProteinDomain( b ); + abc.addProteinDomain( c ); + final Protein abc_r1 = ForesterUtil.removeOverlappingDomains( 3, false, abc ); + final Protein abc_r2 = ForesterUtil.removeOverlappingDomains( 3, true, abc ); + if ( abc.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( abc_r1.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( abc_r2.getNumberOfProteinDomains() != 2 ) { + return false; + } + if ( !abc_r2.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) { + return false; + } + if ( !abc_r2.getProteinDomain( 1 ).getDomainId().equals( "b" ) ) { + return false; + } + final Domain d = new BasicDomain( "d", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 ); + final Domain e = new BasicDomain( "e", 8, 20, ( short ) 1, ( short ) 1, 0.3, 1 ); + final Domain f = new BasicDomain( "f", 15, 16, ( short ) 1, ( short ) 1, 0.2, 1 ); + final Protein def = new BasicProtein( "def", "nemve", 0 ); + def.addProteinDomain( d ); + def.addProteinDomain( e ); + def.addProteinDomain( f ); + final Protein def_r1 = ForesterUtil.removeOverlappingDomains( 5, false, def ); + final Protein def_r2 = ForesterUtil.removeOverlappingDomains( 5, true, def ); + if ( def.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( def_r1.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( def_r2.getNumberOfProteinDomains() != 3 ) { + return false; + } + if ( !def_r2.getProteinDomain( 0 ).getDomainId().equals( "d" ) ) { + return false; + } + if ( !def_r2.getProteinDomain( 1 ).getDomainId().equals( "f" ) ) { + return false; + } + if ( !def_r2.getProteinDomain( 2 ).getDomainId().equals( "e" ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + public static boolean isEqual( final double a, final double b ) { return ( ( Math.abs( a - b ) ) < Test.ZERO_DIFF ); } @@ -248,7 +501,7 @@ public final class Test { failed++; } } - System.exit( 0 ); + ///////////////////////////////////////// System.exit( 0 ); System.out.print( "UniProtKB id extraction: " ); if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) { System.out.println( "OK." ); @@ -288,6 +541,26 @@ public final class Test { System.out.println( "failed." ); failed++; } + // + System.out.print( "Overlap removal: " ); + if ( !org.forester.test.Test.testOverlapRemoval() ) { + System.out.println( "failed." ); + failed++; + } + else { + succeeded++; + } + System.out.println( "OK." ); + System.out.print( "Engulfing overlap removal: " ); + if ( !Test.testEngulfingOverlapRemoval() ) { + System.out.println( "failed." ); + failed++; + } + else { + succeeded++; + } + System.out.println( "OK." ); + // System.out.print( "Taxonomy code extraction: " ); if ( Test.testExtractTaxonomyCodeFromNodeName() ) { System.out.println( "OK." ); @@ -11186,14 +11459,14 @@ public final class Test { System.out.println( entry4.getGeneName() ); return false; } - if ( !entry4.getChromosome().equals( "ras" ) ) { - System.out.println( entry4.getChromosome() ); - return false; - } - if ( !entry4.getMap().equals( "ras" ) ) { - System.out.println( entry4.getMap() ); - return false; - } + // if ( !entry4.getChromosome().equals( "ras" ) ) { + // System.out.println( entry4.getChromosome() ); + // return false; + // } + // if ( !entry4.getMap().equals( "ras" ) ) { + // System.out.println( entry4.getMap() ); + // return false; + // } // //TODO fails: // final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "M30539" ); diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index 43700ef..4b8da89 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -67,6 +67,10 @@ import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Distribution; import org.forester.phylogeny.data.Sequence; import org.forester.phylogeny.data.Taxonomy; +import org.forester.protein.BasicProtein; +import org.forester.protein.Domain; +import org.forester.protein.Protein; +import org.forester.surfacing.SurfacingUtil; public final class ForesterUtil { @@ -88,6 +92,22 @@ public final class ForesterUtil { public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/"; public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/"; public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:"; + public final static Color DEUTEROSTOMIA_COLOR = new Color( 255, 0, 0 ); + public final static Color PROTOSTOMIA_COLOR = new Color( 204, 0, 0 ); + public final static Color METAZOA_COLOR = new Color( 204, 0, 102 ); + public final static Color HOLOZOA_COLOR = new Color( 127, 0, 255 ); + public final static Color FUNGI_COLOR = new Color( 255, 128, 0 ); + public final static Color HOLOMYCOTA_COLOR = new Color( 204, 102, 0 ); + public final static Color AMOEBOZOA_COLOR = new Color( 255, 0, 255 ); + public final static Color VIRIDPLANTAE_COLOR = new Color( 0, 255, 0 ); + public final static Color RHODOPHYTA_COLOR = new Color( 0, 153, 76 ); + public final static Color HACROBIA_COLOR = new Color( 0, 102, 51 ); + public final static Color STRAMENOPILES_COLOR = new Color( 0, 0, 255 ); + public final static Color ALVEOLATA_COLOR = new Color( 0, 128, 255 ); + public final static Color RHIZARIA_COLOR = new Color( 0, 255, 255 ); + public final static Color EXCAVATA_COLOR = new Color( 204, 204, 0 ); + public final static Color ARCHAEA_COLOR = new Color( 160, 160, 160 ); + public final static Color BACTERIA_COLOR = new Color( 64, 64, 64 ); static { final DecimalFormatSymbols dfs = new DecimalFormatSymbols(); dfs.setDecimalSeparator( '.' ); @@ -101,6 +121,16 @@ public final class ForesterUtil { private ForesterUtil() { } + public static int calculateOverlap( final Domain domain, final List covered_positions ) { + int overlap_count = 0; + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) { + ++overlap_count; + } + } + return overlap_count; + } + final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) { if ( sb.length() > 0 ) { sb.append( separator ); @@ -108,6 +138,72 @@ public final class ForesterUtil { } /** + * + * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 => + * domain with 0.3 is ignored + * + * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored + * + * + * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_ + * ignored + * + * @param max_allowed_overlap + * maximal allowed overlap (inclusive) to be still considered not + * overlapping (zero or negative value to allow any overlap) + * @param remove_engulfed_domains + * to remove domains which are completely engulfed by coverage of + * domains with better support + * @param protein + * @return + */ + public static Protein removeOverlappingDomains( final int max_allowed_overlap, + final boolean remove_engulfed_domains, + final Protein protein ) { + final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies() + .getSpeciesId(), protein.getLength() ); + final List sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein ); + final List covered_positions = new ArrayList(); + for( final Domain domain : sorted ) { + if ( ( ( max_allowed_overlap < 0 ) || ( ForesterUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) ) + && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) { + final int covered_positions_size = covered_positions.size(); + for( int i = covered_positions_size; i < domain.getFrom(); ++i ) { + covered_positions.add( false ); + } + final int new_covered_positions_size = covered_positions.size(); + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( i < new_covered_positions_size ) { + covered_positions.set( i, true ); + } + else { + covered_positions.add( true ); + } + } + pruned_protein.addProteinDomain( domain ); + } + } + return pruned_protein; + } + + /** + * Returns true is Domain domain falls in an uninterrupted stretch of + * covered positions. + * + * @param domain + * @param covered_positions + * @return + */ + public static boolean isEngulfed( final Domain domain, final List covered_positions ) { + for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { + if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) { + return false; + } + } + return true; + } + + /** * This calculates a color. If value is equal to min the returned color is * minColor, if value is equal to max the returned color is maxColor, * otherwise a color 'proportional' to value is returned. diff --git a/forester/java/src/org/forester/ws/hmmer/Test.java b/forester/java/src/org/forester/ws/hmmer/Test.java index 7d53e61..f040d2b 100644 --- a/forester/java/src/org/forester/ws/hmmer/Test.java +++ b/forester/java/src/org/forester/ws/hmmer/Test.java @@ -8,6 +8,7 @@ import java.net.HttpURLConnection; import java.net.URL; import java.net.URLEncoder; + public class Test { public static void main( final String[] args ) {