domain_similarity_print_option,
scoring,
true,
- tax_code_to_id_map );
+ tax_code_to_id_map,
+ intrees[ 0 ] );
simple_tab_writer.close();
ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote main output (includes domain similarities) to: \""
+ ( out_dir == null ? my_outfile : out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) + "\"" );
out_dir,
write_pwc_files,
tax_code_to_id_map,
- CALC_SIMILARITY_SCORES );
+ CALC_SIMILARITY_SCORES,
+ intrees[ 0 ] );
String matrix_output_file = new String( output_file.toString() );
if ( matrix_output_file.indexOf( '.' ) > 1 ) {
matrix_output_file = matrix_output_file.substring( 0, matrix_output_file.indexOf( '.' ) );
import org.forester.protein.BasicProtein;
import org.forester.protein.Domain;
import org.forester.protein.Protein;
-import org.forester.surfacing.SurfacingUtil;
import org.forester.util.ForesterUtil;
public final class HmmPfamOutputParser {
if ( ( getMaxAllowedOverlap() != HmmPfamOutputParser.MAX_ALLOWED_OVERLAP_DEFAULT )
|| isIgnoreEngulfedDomains() ) {
final int domains_count = current_protein.getNumberOfProteinDomains();
- current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
- isIgnoreEngulfedDomains(),
- current_protein );
+ current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
+ isIgnoreEngulfedDomains(),
+ current_protein );
final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
_domains_stored -= domains_removed;
_domains_ignored_due_to_overlap += domains_removed;
import org.forester.protein.BasicProtein;
import org.forester.protein.Domain;
import org.forester.protein.Protein;
-import org.forester.surfacing.SurfacingUtil;
import org.forester.util.ForesterUtil;
public final class HmmscanPerDomainTableParser {
if ( ( getMaxAllowedOverlap() != HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT )
|| isIgnoreEngulfedDomains() ) {
final int domains_count = current_protein.getNumberOfProteinDomains();
- current_protein = SurfacingUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
- isIgnoreEngulfedDomains(),
- current_protein );
+ current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
+ isIgnoreEngulfedDomains(),
+ current_protein );
final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
_domains_stored -= domains_removed;
_domains_ignored_due_to_overlap += domains_removed;
import java.util.SortedMap;
import java.util.SortedSet;
+import org.forester.phylogeny.Phylogeny;
import org.forester.species.Species;
import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
public double getStandardDeviationOfSimilarityScore();
- public StringBuffer toStringBuffer( PRINT_OPTION print_option, Map<String, Integer> tax_code_to_id_map );
+ public StringBuffer toStringBuffer( PRINT_OPTION print_option,
+ Map<String, Integer> tax_code_to_id_map,
+ Phylogeny phy );
}
import org.forester.go.GoId;
import org.forester.go.GoNameSpace;
import org.forester.go.GoTerm;
+import org.forester.phylogeny.Phylogeny;
import org.forester.species.Species;
import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
import org.forester.util.DescriptiveStatistics;
final File out_dir,
final boolean write_pairwise_comparisons,
final Map<String, Integer> tax_code_to_id_map,
- final boolean calc_similarity_scores ) {
+ final boolean calc_similarity_scores,
+ Phylogeny phy ) {
init();
final BasicSymmetricalDistanceMatrix domain_distance_scores_means = new BasicSymmetricalDistanceMatrix( number_of_genomes );
final BasicSymmetricalDistanceMatrix shared_domains_based_distances = new BasicSymmetricalDistanceMatrix( number_of_genomes );
domain_similarity_print_option,
scoring,
false,
- tax_code_to_id_map );
+ tax_code_to_id_map,
+ phy );
}
catch ( final IOException e ) {
ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \""
package org.forester.surfacing;
+import java.awt.Color;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
import org.forester.species.Species;
import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
import org.forester.util.ForesterUtil;
private void addSpeciesSpecificDomainData( final StringBuffer sb,
final Species species,
final boolean html,
- final Map<String, Integer> tax_code_to_id_map ) {
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
if ( html ) {
- addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map );
+ addTaxWithLink( sb, species.getSpeciesId(), tax_code_to_id_map, phy );
}
else {
sb.append( species.getSpeciesId() );
private void addTaxWithLink( final StringBuffer sb,
final String tax_code,
- final Map<String, Integer> tax_code_to_id_map ) {
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
+ Color c = null;
+ if ( phy != null && !phy.isEmpty() ) {
+ c = getColorDependingOnTaxonomy( tax_code, phy );
+ }
+ if ( c == null ) {
+ c = new Color( 0, 0, 0 );
+ }
+ final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() );
sb.append( "<b>" );
if ( !ForesterUtil.isEmpty( tax_code )
&& ( ( tax_code_to_id_map != null ) && tax_code_to_id_map.containsKey( tax_code ) ) ) {
sb.append( "<a href=\"" + SurfacingConstants.UNIPROT_TAXONOMY_ID_LINK + tax_code_to_id_map.get( tax_code )
- + "\" target=\"taxonomy_window\">" + tax_code + "</a>" );
+ + "\" target=\"taxonomy_window\" color=\"" + hex + "\">" + tax_code + "</a>" );
}
else {
sb.append( tax_code );
sb.append( "</b>" );
}
+ private Color getColorDependingOnTaxonomy( final String tax_code, final Phylogeny phy ) {
+ List<PhylogenyNode> nodes = phy.getNodesViaTaxonomyCode( tax_code );
+ Color c = null;
+ if ( nodes == null || nodes.isEmpty() ) {
+ throw new RuntimeException( tax_code + " is not found" );
+ }
+ if ( nodes.size() != 1 ) {
+ throw new RuntimeException( tax_code + " is not unique" );
+ }
+ PhylogenyNode n = nodes.get( 0 );
+ while ( n != null ) {
+ c = null;
+ if ( n.getNodeData().isHasTaxonomy()
+ && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
+ c = SurfacingUtil.getColorForTaxCode( n.getNodeData().getTaxonomy().getScientificName() );
+ }
+ if ( c == null && !ForesterUtil.isEmpty( n.getName() ) ) {
+ c = SurfacingUtil.getColorForTaxCode( n.getName() );
+ }
+ if ( c != null ) {
+ break;
+ }
+ n = n.getParent();
+ }
+ return c;
+ }
+
private int compareByDomainId( final DomainSimilarity other ) {
return getDomainId().compareToIgnoreCase( other.getDomainId() );
}
}
private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html,
- final Map<String, Integer> tax_code_to_id_map ) {
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
final StringBuffer sb = new StringBuffer();
for( final Species species : getSpeciesData().keySet() ) {
- addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map );
+ addSpeciesSpecificDomainData( sb, species, html, tax_code_to_id_map, phy );
}
return sb;
}
return sb;
}
- private StringBuffer getSpeciesDataInCustomOrder( final boolean html, final Map<String, Integer> tax_code_to_id_map ) {
+ private StringBuffer getSpeciesDataInCustomOrder( final boolean html,
+ final Map<String, Integer> tax_code_to_id_map,
+ final Phylogeny phy ) {
final StringBuffer sb = new StringBuffer();
for( final Species order_species : getSpeciesCustomOrder() ) {
if ( getSpeciesData().keySet().contains( order_species ) ) {
- addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map );
+ addSpeciesSpecificDomainData( sb, order_species, html, tax_code_to_id_map, phy );
}
else {
sb.append( PrintableDomainSimilarity.NO_SPECIES );
@Override
public StringBuffer toStringBuffer( final PrintableDomainSimilarity.PRINT_OPTION print_option,
- final Map<String, Integer> tax_code_to_id_map ) {
+ final Map<String, Integer> tax_code_to_id_map,
+ Phylogeny phy ) {
switch ( print_option ) {
case SIMPLE_TAB_DELIMITED:
return toStringBufferSimpleTabDelimited();
case HTML:
- return toStringBufferDetailedHTML( tax_code_to_id_map );
+ return toStringBufferDetailedHTML( tax_code_to_id_map, phy );
default:
throw new AssertionError( "Unknown print option: " + print_option );
}
}
- private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map ) {
+ private StringBuffer toStringBufferDetailedHTML( final Map<String, Integer> tax_code_to_id_map, Phylogeny phy ) {
final StringBuffer sb = new StringBuffer();
sb.append( "<tr>" );
sb.append( "<td>" );
}
if ( ( getSpeciesCustomOrder() == null ) || getSpeciesCustomOrder().isEmpty() ) {
sb.append( "<td>" );
- sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map ) );
+ sb.append( getSpeciesDataInAlphabeticalOrder( true, tax_code_to_id_map, phy ) );
sb.append( getDomainDataInAlphabeticalOrder() );
sb.append( "</td>" );
}
else {
sb.append( "<td>" );
- sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map ) );
+ sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) );
sb.append( getDomainDataInAlphabeticalOrder() );
sb.append( "</td>" );
}
final StringBuffer sb = new StringBuffer();
sb.append( getDomainId() );
sb.append( "\t" );
- sb.append( getSpeciesDataInAlphabeticalOrder( false, null ) );
+ sb.append( getSpeciesDataInAlphabeticalOrder( false, null, null ) );
sb.append( "\n" );
return sb;
}
package org.forester.surfacing;
+import java.awt.Color;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
return stats;
}
- public static int calculateOverlap( final Domain domain, final List<Boolean> covered_positions ) {
- int overlap_count = 0;
- for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
- if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) {
- ++overlap_count;
- }
- }
- return overlap_count;
- }
-
public static void checkForOutputFileWriteability( final File outfile ) {
final String error = ForesterUtil.isWritableFile( outfile );
if ( !ForesterUtil.isEmpty( error ) ) {
return c;
}
- /**
- * Returns true is Domain domain falls in an uninterrupted stretch of
- * covered positions.
- *
- * @param domain
- * @param covered_positions
- * @return
- */
- public static boolean isEngulfed( final Domain domain, final List<Boolean> covered_positions ) {
- for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
- if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) {
- return false;
- }
- }
- return true;
- }
-
public static void performDomainArchitectureAnalysis( final SortedMap<String, Set<String>> domain_architecutures,
final SortedMap<String, Integer> domain_architecuture_counts,
final int min_count,
return sb;
}
- /**
- *
- * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 =>
- * domain with 0.3 is ignored
- *
- * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored
- *
- *
- * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_
- * ignored
- *
- * @param max_allowed_overlap
- * maximal allowed overlap (inclusive) to be still considered not
- * overlapping (zero or negative value to allow any overlap)
- * @param remove_engulfed_domains
- * to remove domains which are completely engulfed by coverage of
- * domains with better support
- * @param protein
- * @return
- */
- public static Protein removeOverlappingDomains( final int max_allowed_overlap,
- final boolean remove_engulfed_domains,
- final Protein protein ) {
- final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies()
- .getSpeciesId(), protein.getLength() );
- final List<Domain> sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein );
- final List<Boolean> covered_positions = new ArrayList<Boolean>();
- for( final Domain domain : sorted ) {
- if ( ( ( max_allowed_overlap < 0 ) || ( SurfacingUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) )
- && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) {
- final int covered_positions_size = covered_positions.size();
- for( int i = covered_positions_size; i < domain.getFrom(); ++i ) {
- covered_positions.add( false );
- }
- final int new_covered_positions_size = covered_positions.size();
- for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
- if ( i < new_covered_positions_size ) {
- covered_positions.set( i, true );
- }
- else {
- covered_positions.add( true );
- }
- }
- pruned_protein.addProteinDomain( domain );
- }
- }
- return pruned_protein;
- }
-
public static List<Domain> sortDomainsWithAscendingConfidenceValues( final Protein protein ) {
final List<Domain> domains = new ArrayList<Domain>();
for( final Domain d : protein.getProteinDomains() ) {
final PrintableDomainSimilarity.PRINT_OPTION print_option,
final DomainSimilarity.DomainSimilarityScoring scoring,
final boolean verbose,
- final Map<String, Integer> tax_code_to_id_map )
- throws IOException {
+ final Map<String, Integer> tax_code_to_id_map,
+ Phylogeny phy ) throws IOException {
if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
split_writers = new HashMap<Character, Writer>();
split_writers.put( '_', single_writer );
}
if ( simple_tab_writer != null ) {
simple_tab_writer.write( similarity.toStringBuffer( PRINT_OPTION.SIMPLE_TAB_DELIMITED,
- tax_code_to_id_map ).toString() );
+ tax_code_to_id_map,
+ null ).toString() );
}
if ( single_writer != null ) {
- single_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map ).toString() );
+ single_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map, phy ).toString() );
single_writer.write( SurfacingConstants.NL );
}
else {
if ( local_writer == null ) {
local_writer = split_writers.get( '0' );
}
- local_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map ).toString() );
+ local_writer.write( similarity.toStringBuffer( print_option, tax_code_to_id_map, phy ).toString() );
local_writer.write( SurfacingConstants.NL );
}
}
return 0;
}
}
+
+ final static Color getColorForTaxCode( final String tax ) {
+ if ( tax.equals( "Deuterostomia" ) ) {
+ return ForesterUtil.DEUTEROSTOMIA_COLOR;
+ }
+ else if ( tax.equals( "Protostomia" ) ) {
+ return ForesterUtil.PROTOSTOMIA_COLOR;
+ }
+ else if ( tax.equals( "Metazoa" ) ) {
+ return ForesterUtil.METAZOA_COLOR;
+ }
+ else if ( tax.equals( "Holozoa" ) ) {
+ return ForesterUtil.HOLOZOA_COLOR;
+ }
+ else if ( tax.equals( "Fungi" ) ) {
+ return ForesterUtil.FUNGI_COLOR;
+ }
+ else if ( tax.equals( "Holomycota" ) ) {
+ return ForesterUtil.HOLOMYCOTA_COLOR;
+ }
+ else if ( tax.equals( "Amoebozoa" ) ) {
+ return ForesterUtil.AMOEBOZOA_COLOR;
+ }
+ else if ( tax.equals( "Viridiplantae" ) ) {
+ return ForesterUtil.VIRIDPLANTAE_COLOR;
+ }
+ else if ( tax.equals( "Rhodophytaa" ) ) {
+ return ForesterUtil.RHODOPHYTA_COLOR;
+ }
+ else if ( tax.startsWith( "Hacrobia" ) ) {
+ return ForesterUtil.HACROBIA_COLOR;
+ }
+ else if ( tax.equals( "Stramenopiles" ) ) {
+ return ForesterUtil.STRAMENOPILES_COLOR;
+ }
+ else if ( tax.equals( "Alveolata" ) ) {
+ return ForesterUtil.ALVEOLATA_COLOR;
+ }
+ else if ( tax.equals( "Rhizaria" ) ) {
+ return ForesterUtil.RHIZARIA_COLOR;
+ }
+ else if ( tax.equals( "Excavata" ) ) {
+ return ForesterUtil.EXCAVATA_COLOR;
+ }
+ else if ( tax.equals( "Archaea" ) ) {
+ return ForesterUtil.ARCHAEA_COLOR;
+ }
+ else if ( tax.equals( "Bacteria" ) ) {
+ return ForesterUtil.BACTERIA_COLOR;
+ }
+ return null;
+ }
}
return false;
}
System.out.println( "OK." );
- System.out.print( " Overlap removal: " );
- if ( !TestSurfacing.testOverlapRemoval() ) {
- System.out.println( "failed." );
- return false;
- }
- System.out.println( "OK." );
- System.out.print( " Engulfing overlap removal: " );
- if ( !TestSurfacing.testEngulfingOverlapRemoval() ) {
- System.out.println( "failed." );
- return false;
- }
- System.out.println( "OK." );
System.out.print( " Binary domain combination: " );
if ( !TestSurfacing.testBinaryDomainCombination() ) {
System.out.println( "failed." );
return true;
}
- private static boolean testEngulfingOverlapRemoval() {
- try {
- final Domain d0 = new BasicDomain( "d0", 0, 8, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Domain d1 = new BasicDomain( "d1", 0, 1, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Domain d2 = new BasicDomain( "d2", 0, 2, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Domain d3 = new BasicDomain( "d3", 7, 8, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Domain d4 = new BasicDomain( "d4", 7, 9, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Domain d5 = new BasicDomain( "d4", 0, 9, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Domain d6 = new BasicDomain( "d4", 4, 5, ( short ) 1, ( short ) 1, 0.1, 1 );
- final List<Boolean> covered = new ArrayList<Boolean>();
- covered.add( true ); // 0
- covered.add( false ); // 1
- covered.add( true ); // 2
- covered.add( false ); // 3
- covered.add( true ); // 4
- covered.add( true ); // 5
- covered.add( false ); // 6
- covered.add( true ); // 7
- covered.add( true ); // 8
- if ( SurfacingUtil.isEngulfed( d0, covered ) ) {
- return false;
- }
- if ( SurfacingUtil.isEngulfed( d1, covered ) ) {
- return false;
- }
- if ( SurfacingUtil.isEngulfed( d2, covered ) ) {
- return false;
- }
- if ( !SurfacingUtil.isEngulfed( d3, covered ) ) {
- return false;
- }
- if ( SurfacingUtil.isEngulfed( d4, covered ) ) {
- return false;
- }
- if ( SurfacingUtil.isEngulfed( d5, covered ) ) {
- return false;
- }
- if ( !SurfacingUtil.isEngulfed( d6, covered ) ) {
- return false;
- }
- final Domain a = new BasicDomain( "a", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Domain b = new BasicDomain( "b", 8, 20, ( short ) 1, ( short ) 1, 0.2, 1 );
- final Domain c = new BasicDomain( "c", 15, 16, ( short ) 1, ( short ) 1, 0.3, 1 );
- final Protein abc = new BasicProtein( "abc", "nemve", 0 );
- abc.addProteinDomain( a );
- abc.addProteinDomain( b );
- abc.addProteinDomain( c );
- final Protein abc_r1 = SurfacingUtil.removeOverlappingDomains( 3, false, abc );
- final Protein abc_r2 = SurfacingUtil.removeOverlappingDomains( 3, true, abc );
- if ( abc.getNumberOfProteinDomains() != 3 ) {
- return false;
- }
- if ( abc_r1.getNumberOfProteinDomains() != 3 ) {
- return false;
- }
- if ( abc_r2.getNumberOfProteinDomains() != 2 ) {
- return false;
- }
- if ( !abc_r2.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) {
- return false;
- }
- if ( !abc_r2.getProteinDomain( 1 ).getDomainId().equals( "b" ) ) {
- return false;
- }
- final Domain d = new BasicDomain( "d", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Domain e = new BasicDomain( "e", 8, 20, ( short ) 1, ( short ) 1, 0.3, 1 );
- final Domain f = new BasicDomain( "f", 15, 16, ( short ) 1, ( short ) 1, 0.2, 1 );
- final Protein def = new BasicProtein( "def", "nemve", 0 );
- def.addProteinDomain( d );
- def.addProteinDomain( e );
- def.addProteinDomain( f );
- final Protein def_r1 = SurfacingUtil.removeOverlappingDomains( 5, false, def );
- final Protein def_r2 = SurfacingUtil.removeOverlappingDomains( 5, true, def );
- if ( def.getNumberOfProteinDomains() != 3 ) {
- return false;
- }
- if ( def_r1.getNumberOfProteinDomains() != 3 ) {
- return false;
- }
- if ( def_r2.getNumberOfProteinDomains() != 3 ) {
- return false;
- }
- if ( !def_r2.getProteinDomain( 0 ).getDomainId().equals( "d" ) ) {
- return false;
- }
- if ( !def_r2.getProteinDomain( 1 ).getDomainId().equals( "f" ) ) {
- return false;
- }
- if ( !def_r2.getProteinDomain( 2 ).getDomainId().equals( "e" ) ) {
- return false;
- }
- }
- catch ( final Exception e ) {
- e.printStackTrace( System.out );
- return false;
- }
- return true;
- }
-
private static boolean testGenomeWideCombinableDomains() {
try {
final Domain a = new BasicDomain( "a", 23, 25, ( short ) 1, ( short ) 4, 0.1, -12 );
return true;
}
- private static boolean testOverlapRemoval() {
- try {
- final Domain d0 = new BasicDomain( "d0", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Domain d1 = new BasicDomain( "d1", ( short ) 7, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Domain d2 = new BasicDomain( "d2", ( short ) 0, ( short ) 20, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Domain d3 = new BasicDomain( "d3", ( short ) 9, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Domain d4 = new BasicDomain( "d4", ( short ) 7, ( short ) 8, ( short ) 1, ( short ) 1, 0.1, 1 );
- final List<Boolean> covered = new ArrayList<Boolean>();
- covered.add( true ); // 0
- covered.add( false ); // 1
- covered.add( true ); // 2
- covered.add( false ); // 3
- covered.add( true ); // 4
- covered.add( true ); // 5
- covered.add( false ); // 6
- covered.add( true ); // 7
- covered.add( true ); // 8
- if ( SurfacingUtil.calculateOverlap( d0, covered ) != 3 ) {
- return false;
- }
- if ( SurfacingUtil.calculateOverlap( d1, covered ) != 2 ) {
- return false;
- }
- if ( SurfacingUtil.calculateOverlap( d2, covered ) != 6 ) {
- return false;
- }
- if ( SurfacingUtil.calculateOverlap( d3, covered ) != 0 ) {
- return false;
- }
- if ( SurfacingUtil.calculateOverlap( d4, covered ) != 2 ) {
- return false;
- }
- final Domain a = new BasicDomain( "a", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.01, 1 );
- final Domain b = new BasicDomain( "b", ( short ) 2, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 );
- final Protein ab = new BasicProtein( "ab", "varanus", 0 );
- ab.addProteinDomain( a );
- ab.addProteinDomain( b );
- final Protein ab_s0 = SurfacingUtil.removeOverlappingDomains( 3, false, ab );
- if ( ab.getNumberOfProteinDomains() != 2 ) {
- return false;
- }
- if ( ab_s0.getNumberOfProteinDomains() != 1 ) {
- return false;
- }
- if ( !ab_s0.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) {
- return false;
- }
- final Protein ab_s1 = SurfacingUtil.removeOverlappingDomains( 4, false, ab );
- if ( ab.getNumberOfProteinDomains() != 2 ) {
- return false;
- }
- if ( ab_s1.getNumberOfProteinDomains() != 2 ) {
- return false;
- }
- final Domain c = new BasicDomain( "c", ( short ) 20000, ( short ) 20500, ( short ) 1, ( short ) 1, 10, 1 );
- final Domain d = new BasicDomain( "d",
- ( short ) 10000,
- ( short ) 10500,
- ( short ) 1,
- ( short ) 1,
- 0.0000001,
- 1 );
- final Domain e = new BasicDomain( "e", ( short ) 5000, ( short ) 5500, ( short ) 1, ( short ) 1, 0.0001, 1 );
- final Protein cde = new BasicProtein( "cde", "varanus", 0 );
- cde.addProteinDomain( c );
- cde.addProteinDomain( d );
- cde.addProteinDomain( e );
- final Protein cde_s0 = SurfacingUtil.removeOverlappingDomains( 0, false, cde );
- if ( cde.getNumberOfProteinDomains() != 3 ) {
- return false;
- }
- if ( cde_s0.getNumberOfProteinDomains() != 3 ) {
- return false;
- }
- final Domain f = new BasicDomain( "f", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 );
- final Domain g = new BasicDomain( "g", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 );
- final Domain h = new BasicDomain( "h", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 );
- final Domain i = new BasicDomain( "i", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.5, 1 );
- final Domain i2 = new BasicDomain( "i", ( short ) 5, ( short ) 30, ( short ) 1, ( short ) 1, 0.5, 10 );
- final Protein fghi = new BasicProtein( "fghi", "varanus", 0 );
- fghi.addProteinDomain( f );
- fghi.addProteinDomain( g );
- fghi.addProteinDomain( h );
- fghi.addProteinDomain( i );
- fghi.addProteinDomain( i );
- fghi.addProteinDomain( i );
- fghi.addProteinDomain( i2 );
- final Protein fghi_s0 = SurfacingUtil.removeOverlappingDomains( 10, false, fghi );
- if ( fghi.getNumberOfProteinDomains() != 7 ) {
- return false;
- }
- if ( fghi_s0.getNumberOfProteinDomains() != 1 ) {
- return false;
- }
- if ( !fghi_s0.getProteinDomain( 0 ).getDomainId().equals( "h" ) ) {
- return false;
- }
- final Protein fghi_s1 = SurfacingUtil.removeOverlappingDomains( 11, false, fghi );
- if ( fghi.getNumberOfProteinDomains() != 7 ) {
- return false;
- }
- if ( fghi_s1.getNumberOfProteinDomains() != 7 ) {
- return false;
- }
- final Domain j = new BasicDomain( "j", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 );
- final Domain k = new BasicDomain( "k", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 );
- final Domain l = new BasicDomain( "l", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 );
- final Domain m = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 4, 0.5, 1 );
- final Domain m0 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 2, ( short ) 4, 0.5, 1 );
- final Domain m1 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 3, ( short ) 4, 0.5, 1 );
- final Domain m2 = new BasicDomain( "m", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 );
- final Protein jklm = new BasicProtein( "jklm", "varanus", 0 );
- jklm.addProteinDomain( j );
- jklm.addProteinDomain( k );
- jklm.addProteinDomain( l );
- jklm.addProteinDomain( m );
- jklm.addProteinDomain( m0 );
- jklm.addProteinDomain( m1 );
- jklm.addProteinDomain( m2 );
- final Protein jklm_s0 = SurfacingUtil.removeOverlappingDomains( 10, false, jklm );
- if ( jklm.getNumberOfProteinDomains() != 7 ) {
- return false;
- }
- if ( jklm_s0.getNumberOfProteinDomains() != 1 ) {
- return false;
- }
- if ( !jklm_s0.getProteinDomain( 0 ).getDomainId().equals( "l" ) ) {
- return false;
- }
- final Protein jklm_s1 = SurfacingUtil.removeOverlappingDomains( 11, false, jklm );
- if ( jklm.getNumberOfProteinDomains() != 7 ) {
- return false;
- }
- if ( jklm_s1.getNumberOfProteinDomains() != 7 ) {
- return false;
- }
- final Domain only = new BasicDomain( "only", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 );
- final Protein od = new BasicProtein( "od", "varanus", 0 );
- od.addProteinDomain( only );
- final Protein od_s0 = SurfacingUtil.removeOverlappingDomains( 0, false, od );
- if ( od.getNumberOfProteinDomains() != 1 ) {
- return false;
- }
- if ( od_s0.getNumberOfProteinDomains() != 1 ) {
- return false;
- }
- }
- catch ( final Exception e ) {
- e.printStackTrace( System.out );
- return false;
- }
- return true;
- }
-
private static boolean testParsimony() {
try {
final BinaryStates X = BinaryStates.PRESENT;
+ ForesterConstants.PHYLO_XML_VERSION + "/"
+ ForesterConstants.PHYLO_XML_XSD;
+ public static boolean testOverlapRemoval() {
+ try {
+ final Domain d0 = new BasicDomain( "d0", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Domain d1 = new BasicDomain( "d1", ( short ) 7, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Domain d2 = new BasicDomain( "d2", ( short ) 0, ( short ) 20, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Domain d3 = new BasicDomain( "d3", ( short ) 9, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Domain d4 = new BasicDomain( "d4", ( short ) 7, ( short ) 8, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final List<Boolean> covered = new ArrayList<Boolean>();
+ covered.add( true ); // 0
+ covered.add( false ); // 1
+ covered.add( true ); // 2
+ covered.add( false ); // 3
+ covered.add( true ); // 4
+ covered.add( true ); // 5
+ covered.add( false ); // 6
+ covered.add( true ); // 7
+ covered.add( true ); // 8
+ if ( ForesterUtil.calculateOverlap( d0, covered ) != 3 ) {
+ return false;
+ }
+ if ( ForesterUtil.calculateOverlap( d1, covered ) != 2 ) {
+ return false;
+ }
+ if ( ForesterUtil.calculateOverlap( d2, covered ) != 6 ) {
+ return false;
+ }
+ if ( ForesterUtil.calculateOverlap( d3, covered ) != 0 ) {
+ return false;
+ }
+ if ( ForesterUtil.calculateOverlap( d4, covered ) != 2 ) {
+ return false;
+ }
+ final Domain a = new BasicDomain( "a", ( short ) 2, ( short ) 5, ( short ) 1, ( short ) 1, 0.01, 1 );
+ final Domain b = new BasicDomain( "b", ( short ) 2, ( short ) 10, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Protein ab = new BasicProtein( "ab", "varanus", 0 );
+ ab.addProteinDomain( a );
+ ab.addProteinDomain( b );
+ final Protein ab_s0 = ForesterUtil.removeOverlappingDomains( 3, false, ab );
+ if ( ab.getNumberOfProteinDomains() != 2 ) {
+ return false;
+ }
+ if ( ab_s0.getNumberOfProteinDomains() != 1 ) {
+ return false;
+ }
+ if ( !ab_s0.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) {
+ return false;
+ }
+ final Protein ab_s1 = ForesterUtil.removeOverlappingDomains( 4, false, ab );
+ if ( ab.getNumberOfProteinDomains() != 2 ) {
+ return false;
+ }
+ if ( ab_s1.getNumberOfProteinDomains() != 2 ) {
+ return false;
+ }
+ final Domain c = new BasicDomain( "c", ( short ) 20000, ( short ) 20500, ( short ) 1, ( short ) 1, 10, 1 );
+ final Domain d = new BasicDomain( "d",
+ ( short ) 10000,
+ ( short ) 10500,
+ ( short ) 1,
+ ( short ) 1,
+ 0.0000001,
+ 1 );
+ final Domain e = new BasicDomain( "e", ( short ) 5000, ( short ) 5500, ( short ) 1, ( short ) 1, 0.0001, 1 );
+ final Protein cde = new BasicProtein( "cde", "varanus", 0 );
+ cde.addProteinDomain( c );
+ cde.addProteinDomain( d );
+ cde.addProteinDomain( e );
+ final Protein cde_s0 = ForesterUtil.removeOverlappingDomains( 0, false, cde );
+ if ( cde.getNumberOfProteinDomains() != 3 ) {
+ return false;
+ }
+ if ( cde_s0.getNumberOfProteinDomains() != 3 ) {
+ return false;
+ }
+ final Domain f = new BasicDomain( "f", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 );
+ final Domain g = new BasicDomain( "g", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 );
+ final Domain h = new BasicDomain( "h", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 );
+ final Domain i = new BasicDomain( "i", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.5, 1 );
+ final Domain i2 = new BasicDomain( "i", ( short ) 5, ( short ) 30, ( short ) 1, ( short ) 1, 0.5, 10 );
+ final Protein fghi = new BasicProtein( "fghi", "varanus", 0 );
+ fghi.addProteinDomain( f );
+ fghi.addProteinDomain( g );
+ fghi.addProteinDomain( h );
+ fghi.addProteinDomain( i );
+ fghi.addProteinDomain( i );
+ fghi.addProteinDomain( i );
+ fghi.addProteinDomain( i2 );
+ final Protein fghi_s0 = ForesterUtil.removeOverlappingDomains( 10, false, fghi );
+ if ( fghi.getNumberOfProteinDomains() != 7 ) {
+ return false;
+ }
+ if ( fghi_s0.getNumberOfProteinDomains() != 1 ) {
+ return false;
+ }
+ if ( !fghi_s0.getProteinDomain( 0 ).getDomainId().equals( "h" ) ) {
+ return false;
+ }
+ final Protein fghi_s1 = ForesterUtil.removeOverlappingDomains( 11, false, fghi );
+ if ( fghi.getNumberOfProteinDomains() != 7 ) {
+ return false;
+ }
+ if ( fghi_s1.getNumberOfProteinDomains() != 7 ) {
+ return false;
+ }
+ final Domain j = new BasicDomain( "j", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 10, 1 );
+ final Domain k = new BasicDomain( "k", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.01, 1 );
+ final Domain l = new BasicDomain( "l", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 1, 0.0001, 1 );
+ final Domain m = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 1, ( short ) 4, 0.5, 1 );
+ final Domain m0 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 2, ( short ) 4, 0.5, 1 );
+ final Domain m1 = new BasicDomain( "m", ( short ) 10, ( short ) 20, ( short ) 3, ( short ) 4, 0.5, 1 );
+ final Domain m2 = new BasicDomain( "m", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 );
+ final Protein jklm = new BasicProtein( "jklm", "varanus", 0 );
+ jklm.addProteinDomain( j );
+ jklm.addProteinDomain( k );
+ jklm.addProteinDomain( l );
+ jklm.addProteinDomain( m );
+ jklm.addProteinDomain( m0 );
+ jklm.addProteinDomain( m1 );
+ jklm.addProteinDomain( m2 );
+ final Protein jklm_s0 = ForesterUtil.removeOverlappingDomains( 10, false, jklm );
+ if ( jklm.getNumberOfProteinDomains() != 7 ) {
+ return false;
+ }
+ if ( jklm_s0.getNumberOfProteinDomains() != 1 ) {
+ return false;
+ }
+ if ( !jklm_s0.getProteinDomain( 0 ).getDomainId().equals( "l" ) ) {
+ return false;
+ }
+ final Protein jklm_s1 = ForesterUtil.removeOverlappingDomains( 11, false, jklm );
+ if ( jklm.getNumberOfProteinDomains() != 7 ) {
+ return false;
+ }
+ if ( jklm_s1.getNumberOfProteinDomains() != 7 ) {
+ return false;
+ }
+ final Domain only = new BasicDomain( "only", ( short ) 5, ( short ) 30, ( short ) 4, ( short ) 4, 0.5, 10 );
+ final Protein od = new BasicProtein( "od", "varanus", 0 );
+ od.addProteinDomain( only );
+ final Protein od_s0 = ForesterUtil.removeOverlappingDomains( 0, false, od );
+ if ( od.getNumberOfProteinDomains() != 1 ) {
+ return false;
+ }
+ if ( od_s0.getNumberOfProteinDomains() != 1 ) {
+ return false;
+ }
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace( System.out );
+ return false;
+ }
+ return true;
+ }
+
+ public static boolean testEngulfingOverlapRemoval() {
+ try {
+ final Domain d0 = new BasicDomain( "d0", 0, 8, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Domain d1 = new BasicDomain( "d1", 0, 1, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Domain d2 = new BasicDomain( "d2", 0, 2, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Domain d3 = new BasicDomain( "d3", 7, 8, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Domain d4 = new BasicDomain( "d4", 7, 9, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Domain d5 = new BasicDomain( "d4", 0, 9, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Domain d6 = new BasicDomain( "d4", 4, 5, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final List<Boolean> covered = new ArrayList<Boolean>();
+ covered.add( true ); // 0
+ covered.add( false ); // 1
+ covered.add( true ); // 2
+ covered.add( false ); // 3
+ covered.add( true ); // 4
+ covered.add( true ); // 5
+ covered.add( false ); // 6
+ covered.add( true ); // 7
+ covered.add( true ); // 8
+ if ( ForesterUtil.isEngulfed( d0, covered ) ) {
+ return false;
+ }
+ if ( ForesterUtil.isEngulfed( d1, covered ) ) {
+ return false;
+ }
+ if ( ForesterUtil.isEngulfed( d2, covered ) ) {
+ return false;
+ }
+ if ( !ForesterUtil.isEngulfed( d3, covered ) ) {
+ return false;
+ }
+ if ( ForesterUtil.isEngulfed( d4, covered ) ) {
+ return false;
+ }
+ if ( ForesterUtil.isEngulfed( d5, covered ) ) {
+ return false;
+ }
+ if ( !ForesterUtil.isEngulfed( d6, covered ) ) {
+ return false;
+ }
+ final Domain a = new BasicDomain( "a", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Domain b = new BasicDomain( "b", 8, 20, ( short ) 1, ( short ) 1, 0.2, 1 );
+ final Domain c = new BasicDomain( "c", 15, 16, ( short ) 1, ( short ) 1, 0.3, 1 );
+ final Protein abc = new BasicProtein( "abc", "nemve", 0 );
+ abc.addProteinDomain( a );
+ abc.addProteinDomain( b );
+ abc.addProteinDomain( c );
+ final Protein abc_r1 = ForesterUtil.removeOverlappingDomains( 3, false, abc );
+ final Protein abc_r2 = ForesterUtil.removeOverlappingDomains( 3, true, abc );
+ if ( abc.getNumberOfProteinDomains() != 3 ) {
+ return false;
+ }
+ if ( abc_r1.getNumberOfProteinDomains() != 3 ) {
+ return false;
+ }
+ if ( abc_r2.getNumberOfProteinDomains() != 2 ) {
+ return false;
+ }
+ if ( !abc_r2.getProteinDomain( 0 ).getDomainId().equals( "a" ) ) {
+ return false;
+ }
+ if ( !abc_r2.getProteinDomain( 1 ).getDomainId().equals( "b" ) ) {
+ return false;
+ }
+ final Domain d = new BasicDomain( "d", 0, 10, ( short ) 1, ( short ) 1, 0.1, 1 );
+ final Domain e = new BasicDomain( "e", 8, 20, ( short ) 1, ( short ) 1, 0.3, 1 );
+ final Domain f = new BasicDomain( "f", 15, 16, ( short ) 1, ( short ) 1, 0.2, 1 );
+ final Protein def = new BasicProtein( "def", "nemve", 0 );
+ def.addProteinDomain( d );
+ def.addProteinDomain( e );
+ def.addProteinDomain( f );
+ final Protein def_r1 = ForesterUtil.removeOverlappingDomains( 5, false, def );
+ final Protein def_r2 = ForesterUtil.removeOverlappingDomains( 5, true, def );
+ if ( def.getNumberOfProteinDomains() != 3 ) {
+ return false;
+ }
+ if ( def_r1.getNumberOfProteinDomains() != 3 ) {
+ return false;
+ }
+ if ( def_r2.getNumberOfProteinDomains() != 3 ) {
+ return false;
+ }
+ if ( !def_r2.getProteinDomain( 0 ).getDomainId().equals( "d" ) ) {
+ return false;
+ }
+ if ( !def_r2.getProteinDomain( 1 ).getDomainId().equals( "f" ) ) {
+ return false;
+ }
+ if ( !def_r2.getProteinDomain( 2 ).getDomainId().equals( "e" ) ) {
+ return false;
+ }
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace( System.out );
+ return false;
+ }
+ return true;
+ }
+
public static boolean isEqual( final double a, final double b ) {
return ( ( Math.abs( a - b ) ) < Test.ZERO_DIFF );
}
failed++;
}
}
- System.exit( 0 );
+ ///////////////////////////////////////// System.exit( 0 );
System.out.print( "UniProtKB id extraction: " );
if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) {
System.out.println( "OK." );
System.out.println( "failed." );
failed++;
}
+ //
+ System.out.print( "Overlap removal: " );
+ if ( !org.forester.test.Test.testOverlapRemoval() ) {
+ System.out.println( "failed." );
+ failed++;
+ }
+ else {
+ succeeded++;
+ }
+ System.out.println( "OK." );
+ System.out.print( "Engulfing overlap removal: " );
+ if ( !Test.testEngulfingOverlapRemoval() ) {
+ System.out.println( "failed." );
+ failed++;
+ }
+ else {
+ succeeded++;
+ }
+ System.out.println( "OK." );
+ //
System.out.print( "Taxonomy code extraction: " );
if ( Test.testExtractTaxonomyCodeFromNodeName() ) {
System.out.println( "OK." );
System.out.println( entry4.getGeneName() );
return false;
}
- if ( !entry4.getChromosome().equals( "ras" ) ) {
- System.out.println( entry4.getChromosome() );
- return false;
- }
- if ( !entry4.getMap().equals( "ras" ) ) {
- System.out.println( entry4.getMap() );
- return false;
- }
+ // if ( !entry4.getChromosome().equals( "ras" ) ) {
+ // System.out.println( entry4.getChromosome() );
+ // return false;
+ // }
+ // if ( !entry4.getMap().equals( "ras" ) ) {
+ // System.out.println( entry4.getMap() );
+ // return false;
+ // }
//
//TODO fails:
// final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "M30539" );
import org.forester.phylogeny.data.Distribution;
import org.forester.phylogeny.data.Sequence;
import org.forester.phylogeny.data.Taxonomy;
+import org.forester.protein.BasicProtein;
+import org.forester.protein.Domain;
+import org.forester.protein.Protein;
+import org.forester.surfacing.SurfacingUtil;
public final class ForesterUtil {
public static final String NCBI_NUCCORE = "http://www.ncbi.nlm.nih.gov/nuccore/";
public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/";
public static final String NCBI_GI = "http://www.ncbi.nlm.nih.gov/protein/gi:";
+ public final static Color DEUTEROSTOMIA_COLOR = new Color( 255, 0, 0 );
+ public final static Color PROTOSTOMIA_COLOR = new Color( 204, 0, 0 );
+ public final static Color METAZOA_COLOR = new Color( 204, 0, 102 );
+ public final static Color HOLOZOA_COLOR = new Color( 127, 0, 255 );
+ public final static Color FUNGI_COLOR = new Color( 255, 128, 0 );
+ public final static Color HOLOMYCOTA_COLOR = new Color( 204, 102, 0 );
+ public final static Color AMOEBOZOA_COLOR = new Color( 255, 0, 255 );
+ public final static Color VIRIDPLANTAE_COLOR = new Color( 0, 255, 0 );
+ public final static Color RHODOPHYTA_COLOR = new Color( 0, 153, 76 );
+ public final static Color HACROBIA_COLOR = new Color( 0, 102, 51 );
+ public final static Color STRAMENOPILES_COLOR = new Color( 0, 0, 255 );
+ public final static Color ALVEOLATA_COLOR = new Color( 0, 128, 255 );
+ public final static Color RHIZARIA_COLOR = new Color( 0, 255, 255 );
+ public final static Color EXCAVATA_COLOR = new Color( 204, 204, 0 );
+ public final static Color ARCHAEA_COLOR = new Color( 160, 160, 160 );
+ public final static Color BACTERIA_COLOR = new Color( 64, 64, 64 );
static {
final DecimalFormatSymbols dfs = new DecimalFormatSymbols();
dfs.setDecimalSeparator( '.' );
private ForesterUtil() {
}
+ public static int calculateOverlap( final Domain domain, final List<Boolean> covered_positions ) {
+ int overlap_count = 0;
+ for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+ if ( ( i < covered_positions.size() ) && ( covered_positions.get( i ) == true ) ) {
+ ++overlap_count;
+ }
+ }
+ return overlap_count;
+ }
+
final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) {
if ( sb.length() > 0 ) {
sb.append( separator );
}
/**
+ *
+ * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 =>
+ * domain with 0.3 is ignored
+ *
+ * -----------0.1 ----------0.2 --0.3 => domain with 0.3 is ignored
+ *
+ *
+ * ------------0.1 ----------0.3 --0.2 => domains with 0.3 and 0.2 are _not_
+ * ignored
+ *
+ * @param max_allowed_overlap
+ * maximal allowed overlap (inclusive) to be still considered not
+ * overlapping (zero or negative value to allow any overlap)
+ * @param remove_engulfed_domains
+ * to remove domains which are completely engulfed by coverage of
+ * domains with better support
+ * @param protein
+ * @return
+ */
+ public static Protein removeOverlappingDomains( final int max_allowed_overlap,
+ final boolean remove_engulfed_domains,
+ final Protein protein ) {
+ final Protein pruned_protein = new BasicProtein( protein.getProteinId().getId(), protein.getSpecies()
+ .getSpeciesId(), protein.getLength() );
+ final List<Domain> sorted = SurfacingUtil.sortDomainsWithAscendingConfidenceValues( protein );
+ final List<Boolean> covered_positions = new ArrayList<Boolean>();
+ for( final Domain domain : sorted ) {
+ if ( ( ( max_allowed_overlap < 0 ) || ( ForesterUtil.calculateOverlap( domain, covered_positions ) <= max_allowed_overlap ) )
+ && ( !remove_engulfed_domains || !isEngulfed( domain, covered_positions ) ) ) {
+ final int covered_positions_size = covered_positions.size();
+ for( int i = covered_positions_size; i < domain.getFrom(); ++i ) {
+ covered_positions.add( false );
+ }
+ final int new_covered_positions_size = covered_positions.size();
+ for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+ if ( i < new_covered_positions_size ) {
+ covered_positions.set( i, true );
+ }
+ else {
+ covered_positions.add( true );
+ }
+ }
+ pruned_protein.addProteinDomain( domain );
+ }
+ }
+ return pruned_protein;
+ }
+
+ /**
+ * Returns true is Domain domain falls in an uninterrupted stretch of
+ * covered positions.
+ *
+ * @param domain
+ * @param covered_positions
+ * @return
+ */
+ public static boolean isEngulfed( final Domain domain, final List<Boolean> covered_positions ) {
+ for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+ if ( ( i >= covered_positions.size() ) || ( covered_positions.get( i ) != true ) ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
* This calculates a color. If value is equal to min the returned color is
* minColor, if value is equal to max the returned color is maxColor,
* otherwise a color 'proportional' to value is returned.
import java.net.URL;
import java.net.URLEncoder;
+
public class Test {
public static void main( final String[] args ) {