import org.forester.evoinference.distance.NeighborJoining;
import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format;
-import org.forester.evoinference.matrix.distance.DistanceMatrix;
+import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.go.GoId;
import org.forester.go.GoNameSpace;
import org.forester.go.GoTerm;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.protein.BinaryDomainCombination;
+import org.forester.protein.Domain;
+import org.forester.protein.DomainId;
+import org.forester.protein.Protein;
+import org.forester.species.BasicSpecies;
+import org.forester.species.Species;
import org.forester.surfacing.BasicDomainSimilarityCalculator;
import org.forester.surfacing.BasicGenomeWideCombinableDomains;
-import org.forester.surfacing.BasicSpecies;
-import org.forester.surfacing.BinaryDomainCombination;
import org.forester.surfacing.CombinationsBasedPairwiseDomainSimilarityCalculator;
import org.forester.surfacing.DomainCountsBasedPairwiseSimilarityCalculator;
import org.forester.surfacing.DomainCountsDifferenceUtil;
-import org.forester.surfacing.DomainId;
import org.forester.surfacing.DomainLengthsTable;
import org.forester.surfacing.DomainParsimonyCalculator;
import org.forester.surfacing.DomainSimilarity;
import org.forester.surfacing.PairwiseGenomeComparator;
import org.forester.surfacing.PrintableDomainSimilarity;
import org.forester.surfacing.PrintableDomainSimilarity.PRINT_OPTION;
-import org.forester.surfacing.Protein;
import org.forester.surfacing.ProteinCountsBasedPairwiseDomainSimilarityCalculator;
-import org.forester.surfacing.Species;
import org.forester.surfacing.SurfacingUtil;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.BasicTable;
final static private String INPUT_SPECIES_TREE_OPTION = "species_tree";
final static private String SEQ_EXTRACT_OPTION = "prot_extract";
final static private char SEPARATOR_FOR_INPUT_VALUES = '#';
- final static private String PRG_VERSION = "2.230";
- final static private String PRG_DATE = "2012.04.22";
+ final static private String PRG_VERSION = "2.250";
+ final static private String PRG_DATE = "2012.05.07";
final static private String E_MAIL = "czmasek@burnham.org";
final static private String WWW = "www.phylosoft.org/forester/applications/surfacing";
final static private boolean IGNORE_DUFS_DEFAULT = true;
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt";
+ private static final boolean PERFORM_DC_REGAIN_PROTEINS_STATS = true;
private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
final String[][] input_file_properties,
* @param sum_of_all_domains_encountered
* @param all_bin_domain_combinations_encountered
* @param is_gains_analysis
+ * @param protein_length_stats_by_dc
* @throws IOException
*/
private static void executeFitchGainsAnalysis( final File output_file,
return intrees;
}
- private static List<Phylogeny> inferSpeciesTrees( final File outfile, final List<DistanceMatrix> distances_list ) {
+ private static List<Phylogeny> inferSpeciesTrees( final File outfile,
+ final List<BasicSymmetricalDistanceMatrix> distances_list ) {
final NeighborJoining nj = NeighborJoining.createInstance();
final List<Phylogeny> phylogenies = nj.execute( distances_list );
final PhylogenyWriter w = new PhylogenyWriter();
System.out.println( "Ignore combination with self: " + ignore_combination_with_same );
html_desc.append( "<tr><td>Ignore combination with self for domain combination similarity analyses:</td><td>"
+ ignore_combination_with_same + "</td></tr>" + nl );
- ;
System.out.println( "Consider directedness : "
+ ( dc_type != BinaryDomainCombination.DomainCombinationType.BASIC ) );
html_desc.append( "<tr><td>Consider directedness of binary domain combinations:</td><td>"
catch ( final IOException e3 ) {
e3.printStackTrace();
}
+ Map<String, DescriptiveStatistics> protein_length_stats_by_dc = null;
+ Map<String, DescriptiveStatistics> domain_number_stats_by_dc = null;
+ final Map<String, DescriptiveStatistics> domain_length_stats_by_domain = new HashMap<String, DescriptiveStatistics>();
+ if ( PERFORM_DC_REGAIN_PROTEINS_STATS ) {
+ protein_length_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
+ domain_number_stats_by_dc = new HashMap<String, DescriptiveStatistics>();
+ }
// Main loop:
for( int i = 0; i < number_of_genomes; ++i ) {
System.out.println();
dc_data_writer.write( SurfacingUtil.proteinToDomainCombinations( protein, count + "", "\t" )
.toString() );
++count;
+ for( final Domain d : protein.getProteinDomains() ) {
+ final String d_str = d.getDomainId().toString();
+ if ( !domain_length_stats_by_domain.containsKey( d_str ) ) {
+ domain_length_stats_by_domain.put( d_str, new BasicDescriptiveStatistics() );
+ }
+ domain_length_stats_by_domain.get( d_str ).addValue( d.getLength() );
+ }
}
}
catch ( final IOException e ) {
ignore_combination_with_same,
new BasicSpecies( input_file_properties[ i ][ 1 ] ),
domain_id_to_go_ids_map,
- dc_type ) );
+ dc_type,
+ protein_length_stats_by_dc,
+ domain_number_stats_by_dc ) );
domain_lengths_table.addLengths( protein_list );
if ( gwcd_list.get( i ).getSize() > 0 ) {
SurfacingUtil.writeDomainCombinationsCountsFile( input_file_properties,
output_binary_domain_combinationsfor_graph_analysis,
all_bin_domain_combinations_gained_fitch,
all_bin_domain_combinations_lost_fitch,
- dc_type );
+ dc_type,
+ protein_length_stats_by_dc,
+ domain_number_stats_by_dc,
+ domain_length_stats_by_domain );
// Listing of all domain combinations gained is only done if only one input tree is used.
if ( ( domain_id_to_secondary_features_maps != null )
&& ( domain_id_to_secondary_features_maps.length > 0 ) ) {
+ ": to output binary domain combinations for (downstream) graph analysis" );
System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS + ": to output all proteins per domain" );
System.out.println();
+ System.out.println( "Example 1: java -Xms128m -Xmx512m -cp path/to/forester.jar"
+ + " org.forester.application.surfacing p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1"
+ + " -no_eo -mo=0 -input=genomes_limited.txt -out_dir=out -o=o "
+ + " -species_tree=tol.xml -obo=gene_ontology_2012_02_07.obo -pos_filter=f.txt -all_prot" );
System.out.println();
- System.out.println( "Example: java -Xms128m -Xmx512m -cp path/to/forester.jar"
+ System.out.println( "Example 2: java -Xms128m -Xmx512m -cp path/to/forester.jar"
+ " org.forester.application.surfacing -detail=punctilious -o=TEST.html -pwc=TEST"
+ " -cos=Pfam_ls_22_TC2 -p2g=pfam2go -obo=gene_ontology_edit.obo "
+ "-dc_sort=dom -ignore_with_self -no_singles -e=0.001 -mo=1 -no_eo "
- + "-ds_output=detailed_html -scoring=domains -sort=alpha -" + JACKNIFE_OPTION
- + "=50 human mouse brafl strpu" );
+ + "-ds_output=detailed_html -scoring=domains -sort=alpha human mouse brafl strpu" );
System.out.println();
}