import org.forester.evoinference.distance.NeighborJoining;
import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format;
-import org.forester.evoinference.matrix.distance.DistanceMatrix;
+import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.go.GoId;
import org.forester.go.GoNameSpace;
import org.forester.go.GoTerm;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.protein.BinaryDomainCombination;
+import org.forester.protein.Domain;
+import org.forester.protein.DomainId;
+import org.forester.protein.Protein;
+import org.forester.species.BasicSpecies;
+import org.forester.species.Species;
import org.forester.surfacing.BasicDomainSimilarityCalculator;
import org.forester.surfacing.BasicGenomeWideCombinableDomains;
-import org.forester.surfacing.BasicSpecies;
-import org.forester.surfacing.BinaryDomainCombination;
import org.forester.surfacing.CombinationsBasedPairwiseDomainSimilarityCalculator;
import org.forester.surfacing.DomainCountsBasedPairwiseSimilarityCalculator;
import org.forester.surfacing.DomainCountsDifferenceUtil;
-import org.forester.surfacing.DomainId;
import org.forester.surfacing.DomainLengthsTable;
import org.forester.surfacing.DomainParsimonyCalculator;
import org.forester.surfacing.DomainSimilarity;
import org.forester.surfacing.PairwiseGenomeComparator;
import org.forester.surfacing.PrintableDomainSimilarity;
import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
-import org.forester.surfacing.Protein;
import org.forester.surfacing.ProteinCountsBasedPairwiseDomainSimilarityCalculator;
-import org.forester.surfacing.Species;
import org.forester.surfacing.SurfacingUtil;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.BasicTable;
final static private String INPUT_SPECIES_TREE_OPTION = "species_tree";
final static private String SEQ_EXTRACT_OPTION = "prot_extract";
final static private char SEPARATOR_FOR_INPUT_VALUES = '#';
- final static private String PRG_VERSION = "2.230";
- final static private String PRG_DATE = "2012.04.22";
+ final static private String PRG_VERSION = "2.250";
+ final static private String PRG_DATE = "2012.05.07";
final static private String E_MAIL = "czmasek@burnham.org";
final static private String WWW = "www.phylosoft.org/forester/applications/surfacing";
final static private boolean IGNORE_DUFS_DEFAULT = true;
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
+ private static final boolean PERFORM_DC_REGAIN_PROTEINS_STATS = true;
private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
final String[][] input_file_properties,
* @param sum_of_all_domains_encountered
* @param all_bin_domain_combinations_encountered
* @param is_gains_analysis
+ * @param protein_length_stats_by_dc
* @throws IOException
*/
private static void executeFitchGainsAnalysis( final File output_file,
return intrees;
}
- private static List<Phylogeny> inferSpeciesTrees( final File outfile, final List<DistanceMatrix> distances_list ) {
+ private static List<Phylogeny> inferSpeciesTrees( final File outfile,
+ final List<BasicSymmetricalDistanceMatrix> distances_list ) {
final NeighborJoining nj = NeighborJoining.createInstance();
final List<Phylogeny> phylogenies = nj.execute( distances_list );
final PhylogenyWriter w = new PhylogenyWriter();
System.out.println( "Ignore combination with self: " + ignore_combination_with_same );
html_desc.append( "<tr><td>Ignore combination with self for domain combination similarity analyses:</td><td>"
+ ignore_combination_with_same + "</td></tr>" + nl );
- ;
System.out.println( "Consider directedness : "
+ ( dc_type != BinaryDomainCombination.DomainCombinationType.BASIC ) );
html_desc.append( "<tr><td>Consider directedness of binary domain combinations:</td><td>"
catch ( final IOException e3 ) {
e3.printStackTrace();
}
+ Map<String, DescriptiveStatistics> protein_length_stats_by_dc = null;
+ Map<String, DescriptiveStatistics> domain_number_stats_by_dc = null;
+ final Map<String, DescriptiveStatistics> domain_length_stats_by_domain = new HashMap<String, DescriptiveStatistics>();
+ if ( PERFORM_DC_REGAIN_PROTEINS_STATS ) {
+ protein_length_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
+ domain_number_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
+ }
// Main loop:
for( int i = 0; i < number_of_genomes; ++i ) {
System.out.println();
dc_data_writer.write( SurfacingUtil.proteinToDomainCombinations( protein, count + "", "\t" )
.toString() );
++count;
+ for( final Domain d : protein.getProteinDomains() ) {
+ final String d_str = d.getDomainId().toString();
+ if ( !domain_length_stats_by_domain.containsKey( d_str ) ) {
+ domain_length_stats_by_domain.put( d_str, new BasicDescriptiveStatistics() );
+ }
+ domain_length_stats_by_domain.get( d_str ).addValue( d.getLength() );
+ }
}
}
catch ( final IOException e ) {
ignore_combination_with_same,
new BasicSpecies( input_file_properties[ i ][ 1 ] ),
domain_id_to_go_ids_map,
- dc_type ) );
+ dc_type,
+ protein_length_stats_by_dc,
+ domain_number_stats_by_dc ) );
domain_lengths_table.addLengths( protein_list );
if ( gwcd_list.get( i ).getSize() > 0 ) {
SurfacingUtil.writeDomainCombinationsCountsFile( input_file_properties,
output_binary_domain_combinationsfor_graph_analysis,
all_bin_domain_combinations_gained_fitch,
all_bin_domain_combinations_lost_fitch,
- dc_type );
+ dc_type,
+ protein_length_stats_by_dc,
+ domain_number_stats_by_dc,
+ domain_length_stats_by_domain );
// Listing of all domain combinations gained is only done if only one input tree is used.
if ( ( domain_id_to_secondary_features_maps != null )
&& ( domain_id_to_secondary_features_maps.length > 0 ) ) {