X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FSurfacingUtil.java;h=d3e8c0a0c36e31fde063518a0ccca34b13dbe9f1;hb=2ae595537adc997e493ff35f5ab5d2e80709bac2;hp=409a5d13b24602c7fc1c871a3b82422f18648a4e;hpb=94fc8a7809432b86c41a2ae5f3ab4977cd5d5599;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index 409a5d1..d3e8c0a 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -166,7 +166,9 @@ public final class SurfacingUtil { final String outfilename_for_counts, final String outfilename_for_dc, final String outfilename_for_dc_for_go_mapping, - final String outfilename_for_dc_for_go_mapping_unique ) { + final String outfilename_for_dc_for_go_mapping_unique, + final String outfilename_for_rank_counts, + final String outfilename_for_ancestor_species_counts ) { try { final BufferedWriter out_counts = new BufferedWriter( new FileWriter( outfilename_for_counts ) ); final BufferedWriter out_dc = new BufferedWriter( new FileWriter( outfilename_for_dc ) ); @@ -190,6 +192,7 @@ public final class SurfacingUtil { final SortedMap> domain_lists_go = new TreeMap>(); final SortedMap> domain_lists_go_unique = new TreeMap>(); final Set dcs = dc_gain_counts.keySet(); + final SortedSet more_than_once = new TreeSet(); for( final String dc : dcs ) { final int count = dc_gain_counts.get( dc ); if ( histogram.containsKey( count ) ) { @@ -208,6 +211,9 @@ public final class SurfacingUtil { set.addAll( splitDomainCombination( dc ) ); domain_lists_go_unique.put( count, set ); } + if ( count > 1 ) { + more_than_once.add( dc ); + } } final Set histogram_keys = histogram.keySet(); for( final Integer histogram_key : histogram_keys ) { @@ -230,6 +236,52 @@ public final class SurfacingUtil { out_dc.close(); out_dc_for_go_mapping.close(); out_dc_for_go_mapping_unique.close(); + // + final SortedMap lca_rank_counts = new TreeMap(); + final SortedMap lca_ancestor_species_counts = new TreeMap(); + for( final String dc : more_than_once ) { + final List nodes = new ArrayList(); + for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorExternalForward(); it.hasNext(); ) { + final PhylogenyNode n = it.next(); + if ( n.getNodeData().getBinaryCharacters().getGainedCharacters().contains( dc ) ) { + nodes.add( n ); + } + } + for( int i = 0; i < nodes.size() - 1; ++i ) { + for( int j = i + 1; j < nodes.size(); ++j ) { + final PhylogenyNode lca = PhylogenyMethods.getInstance().obtainLCA( nodes.get( i ), + nodes.get( j ) ); + String rank = "unknown"; + if ( lca.getNodeData().isHasTaxonomy() + && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) { + rank = lca.getNodeData().getTaxonomy().getRank(); + } + addToCountMap( lca_rank_counts, rank ); + String lca_species; + if ( lca.getNodeData().isHasTaxonomy() + && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getScientificName() ) ) { + lca_species = lca.getNodeData().getTaxonomy().getScientificName(); + } + else if ( lca.getNodeData().isHasTaxonomy() + && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getCommonName() ) ) { + lca_species = lca.getNodeData().getTaxonomy().getCommonName(); + } + else { + lca_species = lca.getName(); + } + addToCountMap( lca_ancestor_species_counts, lca_species ); + } + } + } + final BufferedWriter out_for_rank_counts = new BufferedWriter( new FileWriter( outfilename_for_rank_counts ) ); + final BufferedWriter out_for_ancestor_species_counts = new BufferedWriter( new FileWriter( outfilename_for_ancestor_species_counts ) ); + ForesterUtil.map2writer( out_for_rank_counts, lca_rank_counts, "\t", ForesterUtil.LINE_SEPARATOR ); + ForesterUtil.map2writer( out_for_ancestor_species_counts, + lca_ancestor_species_counts, + "\t", + ForesterUtil.LINE_SEPARATOR ); + out_for_rank_counts.close(); + out_for_ancestor_species_counts.close(); } catch ( final IOException e ) { ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e ); @@ -246,6 +298,15 @@ public final class SurfacingUtil { + outfilename_for_dc_for_go_mapping_unique + "]" ); } + private final static void addToCountMap( final Map map, final String s ) { + if ( map.containsKey( s ) ) { + map.put( s, map.get( s ) + 1 ); + } + else { + map.put( s, 1 ); + } + } + public static int calculateOverlap( final Domain domain, final List covered_positions ) { int overlap_count = 0; for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) { @@ -724,7 +785,8 @@ public final class SurfacingUtil { + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX, outfile_name + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX, outfile_name + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX, outfile_name - + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX ); + + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX, outfile_name + + "_indep_dc_gains_fitch_lca_ranks.txt", outfile_name + "_indep_dc_gains_fitch_lca_taxonomies.txt" ); } }