import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.PhylogenyNodeI.NH_CONVERSION_SUPPORT_VALUE_STYLE;
import org.forester.phylogeny.data.BinaryCharacters;
import org.forester.phylogeny.data.Confidence;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
private static void calculateIndependentDomainCombinationGains( final Phylogeny local_phylogeny_l,
final String outfilename_for_counts,
final String outfilename_for_dc,
- final String outfilename_for_dc_for_go_mapping ) {
+ final String outfilename_for_dc_for_go_mapping,
+ final String outfilename_for_dc_for_go_mapping_unique,
+ final String outfilename_for_rank_counts,
+ final String outfilename_for_ancestor_species_counts ) {
try {
final BufferedWriter out_counts = new BufferedWriter( new FileWriter( outfilename_for_counts ) );
final BufferedWriter out_dc = new BufferedWriter( new FileWriter( outfilename_for_dc ) );
final BufferedWriter out_dc_for_go_mapping = new BufferedWriter( new FileWriter( outfilename_for_dc_for_go_mapping ) );
+ final BufferedWriter out_dc_for_go_mapping_unique = new BufferedWriter( new FileWriter( outfilename_for_dc_for_go_mapping_unique ) );
final SortedMap<String, Integer> dc_gain_counts = new TreeMap<String, Integer>();
for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorPostorder(); it.hasNext(); ) {
final PhylogenyNode n = it.next();
final SortedMap<Integer, Integer> histogram = new TreeMap<Integer, Integer>();
final SortedMap<Integer, StringBuilder> domain_lists = new TreeMap<Integer, StringBuilder>();
final SortedMap<Integer, PriorityQueue<String>> domain_lists_go = new TreeMap<Integer, PriorityQueue<String>>();
+ final SortedMap<Integer, SortedSet<String>> domain_lists_go_unique = new TreeMap<Integer, SortedSet<String>>();
final Set<String> dcs = dc_gain_counts.keySet();
+ final SortedSet<String> more_than_once = new TreeSet<String>();
for( final String dc : dcs ) {
final int count = dc_gain_counts.get( dc );
if ( histogram.containsKey( count ) ) {
histogram.put( count, histogram.get( count ) + 1 );
- domain_lists.put( count, domain_lists.get( count ).append( ", " + dc ) );
- domain_lists_go.get( count ).add( dc );
+ domain_lists.get( count ).append( ", " + dc );
+ domain_lists_go.get( count ).addAll( splitDomainCombination( dc ) );
+ domain_lists_go_unique.get( count ).addAll( splitDomainCombination( dc ) );
}
else {
histogram.put( count, 1 );
domain_lists.put( count, new StringBuilder( dc ) );
final PriorityQueue<String> q = new PriorityQueue<String>();
- q.add( dc );
+ q.addAll( splitDomainCombination( dc ) );
domain_lists_go.put( count, q );
+ final SortedSet<String> set = new TreeSet<String>();
+ set.addAll( splitDomainCombination( dc ) );
+ domain_lists_go_unique.put( count, set );
+ }
+ if ( count > 1 ) {
+ more_than_once.add( dc );
}
}
final Set<Integer> histogram_keys = histogram.keySet();
final StringBuilder dc = domain_lists.get( histogram_key );
out_counts.write( histogram_key + "\t" + count + ForesterUtil.LINE_SEPARATOR );
out_dc.write( histogram_key + "\t" + dc + ForesterUtil.LINE_SEPARATOR );
+ out_dc_for_go_mapping.write( "#" + histogram_key + ForesterUtil.LINE_SEPARATOR );
+ final Object[] sorted = domain_lists_go.get( histogram_key ).toArray();
+ Arrays.sort( sorted );
+ for( final Object domain : sorted ) {
+ out_dc_for_go_mapping.write( domain + ForesterUtil.LINE_SEPARATOR );
+ }
+ out_dc_for_go_mapping_unique.write( "#" + histogram_key + ForesterUtil.LINE_SEPARATOR );
+ for( final String domain : domain_lists_go_unique.get( histogram_key ) ) {
+ out_dc_for_go_mapping_unique.write( domain + ForesterUtil.LINE_SEPARATOR );
+ }
}
out_counts.close();
out_dc.close();
out_dc_for_go_mapping.close();
+ out_dc_for_go_mapping_unique.close();
+ //
+ final SortedMap<String, Integer> lca_rank_counts = new TreeMap<String, Integer>();
+ final SortedMap<String, Integer> lca_ancestor_species_counts = new TreeMap<String, Integer>();
+ for( final String dc : more_than_once ) {
+ final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
+ for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorExternalForward(); it.hasNext(); ) {
+ final PhylogenyNode n = it.next();
+ if ( n.getNodeData().getBinaryCharacters().getGainedCharacters().contains( dc ) ) {
+ nodes.add( n );
+ }
+ }
+ for( int i = 0; i < nodes.size() - 1; ++i ) {
+ for( int j = i + 1; j < nodes.size(); ++j ) {
+ final PhylogenyNode lca = PhylogenyMethods.getInstance().obtainLCA( nodes.get( i ),
+ nodes.get( j ) );
+ String rank = "unknown";
+ if ( lca.getNodeData().isHasTaxonomy()
+ && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) {
+ rank = lca.getNodeData().getTaxonomy().getRank();
+ }
+ addToCountMap( lca_rank_counts, rank );
+ String lca_species;
+ if ( lca.getNodeData().isHasTaxonomy()
+ && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getScientificName() ) ) {
+ lca_species = lca.getNodeData().getTaxonomy().getScientificName();
+ }
+ else if ( lca.getNodeData().isHasTaxonomy()
+ && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getCommonName() ) ) {
+ lca_species = lca.getNodeData().getTaxonomy().getCommonName();
+ }
+ else {
+ lca_species = lca.getName();
+ }
+ addToCountMap( lca_ancestor_species_counts, lca_species );
+ }
+ }
+ }
+ final BufferedWriter out_for_rank_counts = new BufferedWriter( new FileWriter( outfilename_for_rank_counts ) );
+ final BufferedWriter out_for_ancestor_species_counts = new BufferedWriter( new FileWriter( outfilename_for_ancestor_species_counts ) );
+ ForesterUtil.map2writer( out_for_rank_counts, lca_rank_counts, "\t", ForesterUtil.LINE_SEPARATOR );
+ ForesterUtil.map2writer( out_for_ancestor_species_counts,
+ lca_ancestor_species_counts,
+ "\t",
+ ForesterUtil.LINE_SEPARATOR );
+ out_for_rank_counts.close();
+ out_for_ancestor_species_counts.close();
}
catch ( final IOException e ) {
ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e );
ForesterUtil.programMessage( surfacing.PRG_NAME,
"Wrote independent domain combination gains fitch lists to (for GO mapping) ["
+ outfilename_for_dc_for_go_mapping + "]" );
+ ForesterUtil.programMessage( surfacing.PRG_NAME,
+ "Wrote independent domain combination gains fitch lists to (for GO mapping, unique) ["
+ + outfilename_for_dc_for_go_mapping_unique + "]" );
+ }
+
+ private final static void addToCountMap( final Map<String, Integer> map, final String s ) {
+ if ( map.containsKey( s ) ) {
+ map.put( s, map.get( s ) + 1 );
+ }
+ else {
+ map.put( s, 1 );
+ }
}
public static int calculateOverlap( final Domain domain, final List<Boolean> covered_positions ) {
randomization = "yes, seed = " + random_number_seed_for_fitch_parsimony;
}
else {
- domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( false );
+ domain_parsimony.executeFitchParsimonyOnBinaryDomainCombintion( true );
}
SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(), outfile_name
+ surfacing.PARSIMONY_OUTPUT_GL_SUFFIX_FITCH_BINARY_COMBINATIONS, Format.FORESTER );
calculateIndependentDomainCombinationGains( local_phylogeny_l, outfile_name
+ surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX, outfile_name
+ surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX, outfile_name
- + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX );
+ + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX, outfile_name
+ + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX, outfile_name
+ + "_indep_dc_gains_fitch_lca_ranks.txt", outfile_name + "_indep_dc_gains_fitch_lca_taxonomies.txt" );
}
}
public static void extractProteinNames( final List<Protein> proteins,
final List<DomainId> query_domain_ids_nc_order,
final Writer out,
- final String separator ) throws IOException {
+ final String separator,
+ final String limit_to_species ) throws IOException {
for( final Protein protein : proteins ) {
- if ( protein.contains( query_domain_ids_nc_order, true ) ) {
- out.write( protein.getSpecies().getSpeciesId() );
- out.write( separator );
- out.write( protein.getProteinId().getId() );
- out.write( separator );
- out.write( "[" );
- final Set<DomainId> visited_domain_ids = new HashSet<DomainId>();
- boolean first = true;
- for( final Domain domain : protein.getProteinDomains() ) {
- if ( !visited_domain_ids.contains( domain.getDomainId() ) ) {
- visited_domain_ids.add( domain.getDomainId() );
- if ( first ) {
- first = false;
- }
- else {
- out.write( " " );
- }
- out.write( domain.getDomainId().getId() );
- out.write( " {" );
- out.write( "" + domain.getTotalCount() );
- out.write( "}" );
- }
- }
- out.write( "]" );
- out.write( separator );
- if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
- .equals( SurfacingConstants.NONE ) ) ) {
- out.write( protein.getDescription() );
- }
- out.write( separator );
- if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
- .equals( SurfacingConstants.NONE ) ) ) {
- out.write( protein.getAccession() );
- }
- out.write( SurfacingConstants.NL );
- }
- }
- out.flush();
- }
-
- public static void extractProteinNames( final SortedMap<Species, List<Protein>> protein_lists_per_species,
- final DomainId domain_id,
- final Writer out,
- final String separator ) throws IOException {
- for( final Species species : protein_lists_per_species.keySet() ) {
- for( final Protein protein : protein_lists_per_species.get( species ) ) {
- final List<Domain> domains = protein.getProteinDomains( domain_id );
- if ( domains.size() > 0 ) {
- final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
- for( final Domain domain : domains ) {
- stats.addValue( domain.getPerSequenceEvalue() );
- }
+ if ( ForesterUtil.isEmpty( limit_to_species )
+ || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
+ if ( protein.contains( query_domain_ids_nc_order, true ) ) {
out.write( protein.getSpecies().getSpeciesId() );
out.write( separator );
out.write( protein.getProteinId().getId() );
out.write( separator );
- out.write( "[" + FORMATTER.format( stats.median() ) + "]" );
+ out.write( "[" );
+ final Set<DomainId> visited_domain_ids = new HashSet<DomainId>();
+ boolean first = true;
+ for( final Domain domain : protein.getProteinDomains() ) {
+ if ( !visited_domain_ids.contains( domain.getDomainId() ) ) {
+ visited_domain_ids.add( domain.getDomainId() );
+ if ( first ) {
+ first = false;
+ }
+ else {
+ out.write( " " );
+ }
+ out.write( domain.getDomainId().getId() );
+ out.write( " {" );
+ out.write( "" + domain.getTotalCount() );
+ out.write( "}" );
+ }
+ }
+ out.write( "]" );
out.write( separator );
if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
.equals( SurfacingConstants.NONE ) ) ) {
out.flush();
}
+ public static void extractProteinNames( final SortedMap<Species, List<Protein>> protein_lists_per_species,
+ final DomainId domain_id,
+ final Writer out,
+ final String separator,
+ final String limit_to_species ) throws IOException {
+ for( final Species species : protein_lists_per_species.keySet() ) {
+ for( final Protein protein : protein_lists_per_species.get( species ) ) {
+ if ( ForesterUtil.isEmpty( limit_to_species )
+ || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
+ final List<Domain> domains = protein.getProteinDomains( domain_id );
+ if ( domains.size() > 0 ) {
+ final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+ for( final Domain domain : domains ) {
+ stats.addValue( domain.getPerSequenceEvalue() );
+ }
+ out.write( protein.getSpecies().getSpeciesId() );
+ out.write( separator );
+ out.write( protein.getProteinId().getId() );
+ out.write( separator );
+ out.write( "[" + FORMATTER.format( stats.median() ) + "]" );
+ out.write( separator );
+ if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
+ .equals( SurfacingConstants.NONE ) ) ) {
+ out.write( protein.getDescription() );
+ }
+ out.write( separator );
+ if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
+ .equals( SurfacingConstants.NONE ) ) ) {
+ out.write( protein.getAccession() );
+ }
+ out.write( SurfacingConstants.NL );
+ }
+ }
+ }
+ }
+ out.flush();
+ }
+
public static SortedSet<DomainId> getAllDomainIds( final List<GenomeWideCombinableDomains> gwcd_list ) {
final SortedSet<DomainId> all_domains_ids = new TreeSet<DomainId>();
for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {
final PhylogenyNode n = it.next();
if ( ForesterUtil.isEmpty( n.getName() )
&& ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
- .getScientificName() ) ) ) {
+ .getScientificName() ) )
+ && ( !n.getNodeData().isHasTaxonomy() || ForesterUtil.isEmpty( n.getNodeData().getTaxonomy()
+ .getCommonName() ) ) ) {
if ( n.getParent() != null ) {
names.append( " " );
names.append( n.getParent().getName() );
}
+ final List l = n.getAllExternalDescendants();
+ for( final Object object : l ) {
+ System.out.println( l.toString() );
+ }
++c;
}
}
return domains;
}
+ private static List<String> splitDomainCombination( final String dc ) {
+ final String[] s = dc.split( "=" );
+ if ( s.length != 2 ) {
+ ForesterUtil.printErrorMessage( surfacing.PRG_NAME, "Stringyfied domain combination has illegal format: "
+ + dc );
+ System.exit( -1 );
+ }
+ final List<String> l = new ArrayList<String>( 2 );
+ l.add( s[ 0 ] );
+ l.add( s[ 1 ] );
+ return l;
+ }
+
public static void writeAllDomainsChangedOnAllSubtrees( final Phylogeny p,
final boolean get_gains,
final String outdir,
public static DescriptiveStatistics writeDomainSimilaritiesToFile( final StringBuilder html_desc,
final StringBuilder html_title,
- final Writer w,
+ final Writer single_writer,
+ Map<Character, Writer> split_writers,
final SortedSet<DomainSimilarity> similarities,
final boolean treat_as_binary,
final List<Species> species_order,
System.out.println( "Pearsonian skewness : n/a" );
}
}
+ if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) {
+ split_writers = new HashMap<Character, Writer>();
+ split_writers.put( '_', single_writer );
+ }
switch ( print_option ) {
case SIMPLE_TAB_DELIMITED:
break;
case HTML:
- w.write( "<html>" );
- w.write( SurfacingConstants.NL );
- addHtmlHead( w, "SURFACING :: " + html_title );
- w.write( SurfacingConstants.NL );
- w.write( "<body>" );
- w.write( SurfacingConstants.NL );
- w.write( html_desc.toString() );
- w.write( SurfacingConstants.NL );
- w.write( "<hr>" );
- w.write( "<br>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tt><pre>" );
- w.write( SurfacingConstants.NL );
- if ( histo != null ) {
- w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+ for( final Character key : split_writers.keySet() ) {
+ final Writer w = split_writers.get( key );
+ w.write( "<html>" );
+ w.write( SurfacingConstants.NL );
+ if ( key != '_' ) {
+ addHtmlHead( w, "DCs (" + html_title + ") " + key.toString().toUpperCase() );
+ }
+ else {
+ addHtmlHead( w, "DCs (" + html_title + ")" );
+ }
+ w.write( SurfacingConstants.NL );
+ w.write( "<body>" );
+ w.write( SurfacingConstants.NL );
+ w.write( html_desc.toString() );
+ w.write( SurfacingConstants.NL );
+ w.write( "<hr>" );
+ w.write( "<br>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tt><pre>" );
+ w.write( SurfacingConstants.NL );
+ if ( histo != null ) {
+ w.write( histo.toStringBuffer( 20, '|', 40, 5 ).toString() );
+ w.write( SurfacingConstants.NL );
+ }
+ w.write( "</pre></tt>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<table>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ if ( stats.getN() > 1 ) {
+ w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
+ }
+ else {
+ w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
+ }
+ w.write( SurfacingConstants.NL );
+ w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
+ w.write( SurfacingConstants.NL );
+ if ( stats.getN() > 1 ) {
+ w.write( "<tr><td>Pearsonian skewness: </td><td>" + stats.pearsonianSkewness() + "</td></tr>" );
+ }
+ else {
+ w.write( "<tr><td>Pearsonian skewness: </td><td>n/a</td></tr>" );
+ }
+ w.write( SurfacingConstants.NL );
+ w.write( "</table>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<br>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<hr>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<br>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "<table>" );
w.write( SurfacingConstants.NL );
}
- w.write( "</pre></tt>" );
- w.write( SurfacingConstants.NL );
- w.write( "<table>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>N: </td><td>" + stats.getN() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>Min: </td><td>" + stats.getMin() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>Max: </td><td>" + stats.getMax() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>Mean: </td><td>" + stats.arithmeticMean() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- if ( stats.getN() > 1 ) {
- w.write( "<tr><td>SD: </td><td>" + stats.sampleStandardDeviation() + "</td></tr>" );
- }
- else {
- w.write( "<tr><td>SD: </td><td>n/a</td></tr>" );
- }
- w.write( SurfacingConstants.NL );
- w.write( "<tr><td>Median: </td><td>" + stats.median() + "</td></tr>" );
- w.write( SurfacingConstants.NL );
- if ( stats.getN() > 1 ) {
- w.write( "<tr><td>Pearsonian skewness: </td><td>" + stats.pearsonianSkewness() + "</td></tr>" );
- }
- else {
- w.write( "<tr><td>Pearsonian skewness: </td><td>n/a</td></tr>" );
- }
- w.write( SurfacingConstants.NL );
- w.write( "</table>" );
- w.write( SurfacingConstants.NL );
- w.write( "<br>" );
- w.write( SurfacingConstants.NL );
- w.write( "<hr>" );
- w.write( SurfacingConstants.NL );
- w.write( "<br>" );
- w.write( SurfacingConstants.NL );
- w.write( "<table>" );
- w.write( SurfacingConstants.NL );
break;
}
- w.write( SurfacingConstants.NL );
+ for( final Writer w : split_writers.values() ) {
+ w.write( SurfacingConstants.NL );
+ }
for( final DomainSimilarity similarity : similarities ) {
if ( ( species_order != null ) && !species_order.isEmpty() ) {
( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order );
}
- w.write( similarity.toStringBuffer( print_option ).toString() );
- w.write( SurfacingConstants.NL );
+ if ( single_writer != null ) {
+ single_writer.write( similarity.toStringBuffer( print_option ).toString() );
+ }
+ else {
+ Writer local_writer = split_writers.get( ( similarity.getDomainId().getId().charAt( 0 ) + "" )
+ .toLowerCase().charAt( 0 ) );
+ if ( local_writer == null ) {
+ local_writer = split_writers.get( '0' );
+ }
+ local_writer.write( similarity.toStringBuffer( print_option ).toString() );
+ }
+ for( final Writer w : split_writers.values() ) {
+ w.write( SurfacingConstants.NL );
+ }
}
switch ( print_option ) {
case HTML:
- w.write( SurfacingConstants.NL );
- w.write( "</table>" );
- w.write( SurfacingConstants.NL );
- w.write( "</font>" );
- w.write( SurfacingConstants.NL );
- w.write( "</body>" );
- w.write( SurfacingConstants.NL );
- w.write( "</html>" );
- w.write( SurfacingConstants.NL );
+ for( final Writer w : split_writers.values() ) {
+ w.write( SurfacingConstants.NL );
+ w.write( "</table>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "</font>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "</body>" );
+ w.write( SurfacingConstants.NL );
+ w.write( "</html>" );
+ w.write( SurfacingConstants.NL );
+ }
break;
}
- w.flush();
- w.close();
+ for( final Writer w : split_writers.values() ) {
+ w.close();
+ }
return stats;
}
w.write( ForesterUtil.LINE_SEPARATOR );
my_matrix.writeNexusTaxaBlock( w );
my_matrix.writeNexusBinaryChractersBlock( w );
- PhylogenyWriter.writeNexusTreesBlock( w, phylogenies );
+ PhylogenyWriter.writeNexusTreesBlock( w, phylogenies, NH_CONVERSION_SUPPORT_VALUE_STYLE.NONE );
w.flush();
w.close();
ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote Nexus file: \"" + outfile_name + "\"" );
domain_parsimony.createMatrixOfBinaryDomainCombinationPresenceOrAbsence(),
phylogeny );
}
+
+ public static void domainsPerProteinsStatistics( final String genome,
+ final List<Protein> protein_list,
+ final DescriptiveStatistics all_genomes_domains_per_potein_stats,
+ final SortedMap<Integer, Integer> all_genomes_domains_per_potein_histo,
+ final SortedSet<String> domains_which_are_always_single,
+ final SortedSet<String> domains_which_are_sometimes_single_sometimes_not,
+ final SortedSet<String> domains_which_never_single,
+ final Writer writer ) {
+ final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+ for( final Protein protein : protein_list ) {
+ final int domains = protein.getNumberOfProteinDomains();
+ //System.out.println( domains );
+ stats.addValue( domains );
+ all_genomes_domains_per_potein_stats.addValue( domains );
+ if ( !all_genomes_domains_per_potein_histo.containsKey( domains ) ) {
+ all_genomes_domains_per_potein_histo.put( domains, 1 );
+ }
+ else {
+ all_genomes_domains_per_potein_histo.put( domains,
+ 1 + all_genomes_domains_per_potein_histo.get( domains ) );
+ }
+ if ( domains == 1 ) {
+ final String domain = protein.getProteinDomain( 0 ).getDomainId().getId();
+ if ( !domains_which_are_sometimes_single_sometimes_not.contains( domain ) ) {
+ if ( domains_which_never_single.contains( domain ) ) {
+ domains_which_never_single.remove( domain );
+ domains_which_are_sometimes_single_sometimes_not.add( domain );
+ }
+ else {
+ domains_which_are_always_single.add( domain );
+ }
+ }
+ }
+ else if ( domains > 1 ) {
+ for( final Domain d : protein.getProteinDomains() ) {
+ final String domain = d.getDomainId().getId();
+ // System.out.println( domain );
+ if ( !domains_which_are_sometimes_single_sometimes_not.contains( domain ) ) {
+ if ( domains_which_are_always_single.contains( domain ) ) {
+ domains_which_are_always_single.remove( domain );
+ domains_which_are_sometimes_single_sometimes_not.add( domain );
+ }
+ else {
+ domains_which_never_single.add( domain );
+ }
+ }
+ }
+ }
+ }
+ try {
+ writer.write( genome );
+ writer.write( "\t" );
+ if ( stats.getN() >= 1 ) {
+ writer.write( stats.arithmeticMean() + "" );
+ writer.write( "\t" );
+ if ( stats.getN() >= 2 ) {
+ writer.write( stats.sampleStandardDeviation() + "" );
+ }
+ else {
+ writer.write( "" );
+ }
+ writer.write( "\t" );
+ writer.write( stats.median() + "" );
+ writer.write( "\t" );
+ writer.write( stats.getN() + "" );
+ writer.write( "\t" );
+ writer.write( stats.getMin() + "" );
+ writer.write( "\t" );
+ writer.write( stats.getMax() + "" );
+ }
+ else {
+ writer.write( "\t" );
+ writer.write( "\t" );
+ writer.write( "\t" );
+ writer.write( "0" );
+ writer.write( "\t" );
+ writer.write( "\t" );
+ }
+ writer.write( "\n" );
+ }
+ catch ( final IOException e ) {
+ e.printStackTrace();
+ }
+ }
}