import org.forester.protein.Domain;
import org.forester.protein.Protein;
import org.forester.species.Species;
+import org.forester.surfacing.DomainSimilarity.PRINT_OPTION;
import org.forester.surfacing.DomainSimilarityCalculator.Detailedness;
import org.forester.surfacing.GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder;
-import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
import org.forester.util.AsciiHistogram;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.BasicTable;
public final static Pattern PATTERN_SP_STYLE_TAXONOMY = Pattern.compile( "^[A-Z0-9]{3,5}$" );
private final static Map<String, String> _TAXCODE_HEXCOLORSTRING_MAP = new HashMap<String, String>();
-
-
- private final static Map<String, String> _TAXCODE_TAXGROUP_MAP = new HashMap<String, String>();
-
-
+ private final static Map<String, String> _TAXCODE_TAXGROUP_MAP = new HashMap<String, String>();
private static final Comparator<Domain> ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator<Domain>() {
@Override
public int compare( final Domain d1,
final Domain d2 ) {
- if ( d1.getPerSequenceEvalue() < d2
- .getPerSequenceEvalue() ) {
+ if ( d1.getPerDomainEvalue() < d2
+ .getPerDomainEvalue() ) {
return -1;
}
- else if ( d1
- .getPerSequenceEvalue() > d2
- .getPerSequenceEvalue() ) {
+ else if ( d1.getPerDomainEvalue() > d2
+ .getPerDomainEvalue() ) {
return 1;
}
else {
}
}
- public static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
+ public static void checkWriteabilityForPairwiseComparisons( final DomainSimilarity.PRINT_OPTION domain_similarity_print_option,
final String[][] input_file_properties,
final String automated_pairwise_comparison_suffix,
final File outdir ) {
|| ( !get_gains && ( matrix.getState( id, c ) == CharacterStateMatrix.GainLossStates.LOSS ) ) ) {
if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED_ADJACTANT ) {
all_binary_domains_combination_gained.add( AdjactantDirectedBinaryDomainCombination
- .createInstance( matrix.getCharacter( c ) ) );
+ .obtainInstance( matrix.getCharacter( c ) ) );
}
else if ( dc_type == BinaryDomainCombination.DomainCombinationType.DIRECTED ) {
all_binary_domains_combination_gained.add( DirectedBinaryDomainCombination
- .createInstance( matrix.getCharacter( c ) ) );
+ .obtainInstance( matrix.getCharacter( c ) ) );
}
else {
- all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.createInstance( matrix
+ all_binary_domains_combination_gained.add( BasicBinaryDomainCombination.obtainInstance( matrix
.getCharacter( c ) ) );
}
}
}
public static StringBuilder createParametersAsString( final boolean ignore_dufs,
- final double e_value_max,
+ final double ie_value_max,
+ final double fs_e_value_max,
final int max_allowed_overlap,
final boolean no_engulfing_overlaps,
final File cutoff_scores_file,
final BinaryDomainCombination.DomainCombinationType dc_type ) {
final StringBuilder parameters_sb = new StringBuilder();
- parameters_sb.append( "E-value: " + e_value_max );
+ parameters_sb.append( "iE-value: " + ie_value_max );
+ parameters_sb.append( ", FS E-value: " + fs_e_value_max );
if ( cutoff_scores_file != null ) {
parameters_sb.append( ", Cutoff-scores-file: " + cutoff_scores_file );
}
public static void decoratePrintableDomainSimilarities( final SortedSet<DomainSimilarity> domain_similarities,
final Detailedness detailedness ) {
for( final DomainSimilarity domain_similarity : domain_similarities ) {
- if ( domain_similarity instanceof PrintableDomainSimilarity ) {
- final PrintableDomainSimilarity printable_domain_similarity = ( PrintableDomainSimilarity ) domain_similarity;
+ if ( domain_similarity instanceof DomainSimilarity ) {
+ final DomainSimilarity printable_domain_similarity = domain_similarity;
printable_domain_similarity.setDetailedness( detailedness );
}
}
*
* @param all_binary_domains_combination_lost_fitch
* @param use_last_in_fitch_parsimony
+ * @param perform_dc_fich
* @param consider_directedness_and_adjacency_for_bin_combinations
* @param all_binary_domains_combination_gained if null ignored, otherwise this is to list all binary domain combinations
* which were gained under unweighted (Fitch) parsimony.
final Map<String, DescriptiveStatistics> domain_length_stats_by_domain,
final Map<String, Integer> tax_code_to_id_map,
final boolean write_to_nexus,
- final boolean use_last_in_fitch_parsimony ) {
+ final boolean use_last_in_fitch_parsimony,
+ final boolean perform_dc_fich ) {
final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
final String date_time = ForesterUtil.getCurrentDateTime();
final SortedSet<String> all_pfams_encountered = new TreeSet<String>();
e.printStackTrace();
ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
}
- if ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) {
+ if ( perform_dc_fich && ( domain_parsimony.calculateNumberOfBinaryDomainCombination() > 0 ) ) {
// FITCH DOMAIN COMBINATIONS
// -------------------------
local_phylogeny_l = phylogeny.copy();
throws IllegalArgumentException {
if ( !_TAXCODE_HEXCOLORSTRING_MAP.containsKey( tax_code ) ) {
if ( ( phy != null ) && !phy.isEmpty() ) {
- final List<PhylogenyNode> nodes = phy.getNodesViaTaxonomyCode( tax_code );
- Color c = null;
- if ( ( nodes == null ) || nodes.isEmpty() ) {
- throw new IllegalArgumentException( "code " + tax_code + " is not found" );
- }
- if ( nodes.size() != 1 ) {
- throw new IllegalArgumentException( "code " + tax_code + " is not unique" );
- }
- PhylogenyNode n = nodes.get( 0 );
- while ( n != null ) {
- if ( n.getNodeData().isHasTaxonomy()
- && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
- c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy()
- .getScientificName(), tax_code );
- }
- if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) {
- c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code );
- }
- if ( c != null ) {
- break;
- }
- n = n.getParent();
- }
+ // final List<PhylogenyNode> nodes = phy.getNodesViaTaxonomyCode( tax_code );
+ // Color c = null;
+ // if ( ( nodes == null ) || nodes.isEmpty() ) {
+ // throw new IllegalArgumentException( "code " + tax_code + " is not found" );
+ // }
+ // if ( nodes.size() != 1 ) {
+ // throw new IllegalArgumentException( "code " + tax_code + " is not unique" );
+ // }
+ // PhylogenyNode n = nodes.get( 0 );
+ // while ( n != null ) {
+ // if ( n.getNodeData().isHasTaxonomy()
+ // && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
+ // c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy()
+ // .getScientificName(), tax_code );
+ // }
+ // if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) {
+ // c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code );
+ // }
+ // if ( c != null ) {
+ // break;
+ // }
+ // n = n.getParent();
+ // }
+ final String group = obtainTaxonomyGroup( tax_code, phy );
+ final Color c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group );
if ( c == null ) {
- throw new IllegalArgumentException( "no color found for taxonomy code \"" + tax_code + "\"" );
+ throw new IllegalArgumentException( "no color found for taxonomy group \"" + group
+ + "\" for code \"" + tax_code + "\"" );
}
final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() );
_TAXCODE_HEXCOLORSTRING_MAP.put( tax_code, hex );
}
return _TAXCODE_HEXCOLORSTRING_MAP.get( tax_code );
}
-
-
- public static String obtainTaxonomyGroup( final String tax_code, final Phylogeny phy )
+
+ public static String obtainTaxonomyGroup( final String tax_code, final Phylogeny species_tree )
throws IllegalArgumentException {
if ( !_TAXCODE_TAXGROUP_MAP.containsKey( tax_code ) ) {
- if ( ( phy != null ) && !phy.isEmpty() ) {
- final List<PhylogenyNode> nodes = phy.getNodesViaTaxonomyCode( tax_code );
-
+ if ( ( species_tree != null ) && !species_tree.isEmpty() ) {
+ final List<PhylogenyNode> nodes = species_tree.getNodesViaTaxonomyCode( tax_code );
if ( ( nodes == null ) || nodes.isEmpty() ) {
throw new IllegalArgumentException( "code " + tax_code + " is not found" );
}
}
PhylogenyNode n = nodes.get( 0 );
String group = null;
- Color c = null;
while ( n != null ) {
if ( n.getNodeData().isHasTaxonomy()
&& !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
- c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy()
- .getScientificName(), tax_code );
-
- group = n.getNodeData().getTaxonomy()
- .getScientificName();
+ group = ForesterUtil.obtainNormalizedTaxonomyGroup( n.getNodeData().getTaxonomy()
+ .getScientificName() );
}
- if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) {
- c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code );
- group = n.getName();
+ if ( ForesterUtil.isEmpty( group ) && !ForesterUtil.isEmpty( n.getName() ) ) {
+ group = ForesterUtil.obtainNormalizedTaxonomyGroup( n.getName() );
}
- if ( c != null ) {
+ if ( !ForesterUtil.isEmpty( group ) ) {
break;
}
- group = null;
n = n.getParent();
}
- if ( c == null ) {
+ if ( ForesterUtil.isEmpty( group ) ) {
throw new IllegalArgumentException( "no group found for taxonomy code \"" + tax_code + "\"" );
}
-
_TAXCODE_TAXGROUP_MAP.put( tax_code, group );
}
else {
}
return _TAXCODE_TAXGROUP_MAP.get( tax_code );
}
-
-
-
public static void performDomainArchitectureAnalysis( final SortedMap<String, Set<String>> domain_architecutures,
final SortedMap<String, Integer> domain_architecuture_counts,
catch ( final IOException e ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
}
- ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \""
- + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
- + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" );
+ if ( input_file_properties[ i ].length == 3 ) {
+ ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \""
+ + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ", "
+ + input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" );
+ }
+ else {
+ ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote binary domain combination for \""
+ + input_file_properties[ i ][ 0 ] + "\" (" + input_file_properties[ i ][ 1 ] + ") to: \""
+ + dc_outfile_dot + "\"" );
+ }
}
public static void writeBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
.getState( id, c ) == CharacterStateMatrix.GainLossStates.UNCHANGED_PRESENT ) ) ) ) {
BinaryDomainCombination bdc = null;
try {
- bdc = BasicBinaryDomainCombination.createInstance( matrix.getCharacter( c ) );
+ bdc = BasicBinaryDomainCombination.obtainInstance( matrix.getCharacter( c ) );
}
catch ( final Exception e ) {
ForesterUtil.fatalError( surfacing.PRG_NAME, e.getLocalizedMessage() );
final SortedSet<DomainSimilarity> similarities,
final boolean treat_as_binary,
final List<Species> species_order,
- final PrintableDomainSimilarity.PRINT_OPTION print_option,
+ final DomainSimilarity.PRINT_OPTION print_option,
final DomainSimilarity.DomainSimilarityScoring scoring,
final boolean verbose,
final Map<String, Integer> tax_code_to_id_map,
- final Phylogeny phy ) throws IOException {
+ final Phylogeny phy,
+ final Set<String> pos_filter_doms ) throws IOException {
if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
split_writers = new HashMap<Character, Writer>();
split_writers.put( '_', single_writer );
//
for( final DomainSimilarity similarity : similarities ) {
if ( ( species_order != null ) && !species_order.isEmpty() ) {
- ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
+ ( similarity ).setSpeciesOrder( species_order );
}
if ( single_writer != null ) {
- single_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
- + similarity.getDomainId() + "</a></b></td></tr>" );
+ if ( !ForesterUtil.isEmpty( pos_filter_doms ) && pos_filter_doms.contains( similarity.getDomainId() ) ) {
+ single_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId()
+ + "\"><span style=\"color:#00ff00\">" + similarity.getDomainId()
+ + "</span></a></b></td></tr>" );
+ }
+ else {
+ single_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
+ + similarity.getDomainId() + "</a></b></td></tr>" );
+ }
single_writer.write( SurfacingConstants.NL );
}
else {
if ( local_writer == null ) {
local_writer = split_writers.get( '0' );
}
- local_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
- + similarity.getDomainId() + "</a></b></td></tr>" );
+ if ( !ForesterUtil.isEmpty( pos_filter_doms ) && pos_filter_doms.contains( similarity.getDomainId() ) ) {
+ local_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId()
+ + "\"><span style=\"color:#00ff00\">" + similarity.getDomainId()
+ + "</span></a></b></td></tr>" );
+ }
+ else {
+ local_writer.write( "<tr><td><b><a href=\"#" + similarity.getDomainId() + "\">"
+ + similarity.getDomainId() + "</a></b></td></tr>" );
+ }
local_writer.write( SurfacingConstants.NL );
}
}
//
for( final DomainSimilarity similarity : similarities ) {
if ( ( species_order != null ) && !species_order.isEmpty() ) {
- ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
+ ( similarity ).setSpeciesOrder( species_order );
}
if ( simple_tab_writer != null ) {
simple_tab_writer.write( similarity.toStringBuffer( PRINT_OPTION.SIMPLE_TAB_DELIMITED,
public static void writeProteinListsForAllSpecies( final File output_dir,
final SortedMap<Species, List<Protein>> protein_lists_per_species,
final List<GenomeWideCombinableDomains> gwcd_list,
- final double domain_e_cutoff ) {
+ final double domain_e_cutoff,
+ final Set<String> pos_filter_doms ) {
final SortedSet<String> all_domains = new TreeSet<String>();
for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
all_domains.addAll( gwcd.getAllDomainIds() );
}
for( final String domain : all_domains ) {
+ if ( !ForesterUtil.isEmpty( pos_filter_doms ) && !pos_filter_doms.contains( domain ) ) {
+ continue;
+ }
final File out = new File( output_dir + ForesterUtil.FILE_SEPARATOR + domain + surfacing.SEQ_EXTRACT_SUFFIX );
checkForOutputFileWriteability( out );
try {