X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FSurfacingUtil.java;h=abcd31f67e309e1a5547e289d0bb1d6e00c33cdf;hb=3491e0d3c56167d6f6e8f291572e40367efd7fca;hp=b7590aa8f4da61b64ad9c3103347e8228e0321c9;hpb=841c7135a24076a444b2f6652cde00ef0cf60239;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index b7590aa..abcd31f 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -22,7 +22,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; @@ -39,6 +39,7 @@ import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -69,7 +70,7 @@ import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.PhylogenyNodeI.NH_CONVERSION_SUPPORT_VALUE_STYLE; +import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; @@ -120,6 +121,75 @@ public final class SurfacingUtil { // Hidden constructor. } + public static void performDomainArchitectureAnalysis( final SortedMap> domain_architecutures, + final SortedMap domain_architecuture_counts, + final int min_count, + final File da_counts_outfile, + final File unique_da_outfile ) { + checkForOutputFileWriteability( da_counts_outfile ); + checkForOutputFileWriteability( unique_da_outfile ); + try { + final BufferedWriter da_counts_out = new BufferedWriter( new FileWriter( da_counts_outfile ) ); + final BufferedWriter unique_da_out = new BufferedWriter( new FileWriter( unique_da_outfile ) ); + final Iterator> it = domain_architecuture_counts.entrySet().iterator(); + while ( it.hasNext() ) { + final Map.Entry e = it.next(); + final String da = e.getKey(); + final int count = e.getValue(); + if ( count >= min_count ) { + da_counts_out.write( da ); + da_counts_out.write( "\t" ); + da_counts_out.write( String.valueOf( count ) ); + da_counts_out.write( ForesterUtil.LINE_SEPARATOR ); + } + if ( count == 1 ) { + final Iterator>> it2 = domain_architecutures.entrySet().iterator(); + while ( it2.hasNext() ) { + final Map.Entry> e2 = it2.next(); + final String genome = e2.getKey(); + final Set das = e2.getValue(); + if ( das.contains( da ) ) { + unique_da_out.write( genome ); + unique_da_out.write( "\t" ); + unique_da_out.write( da ); + unique_da_out.write( ForesterUtil.LINE_SEPARATOR ); + } + } + } + } + unique_da_out.close(); + da_counts_out.close(); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() ); + } + ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote distance matrices to \"" + da_counts_outfile + "\"" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote distance matrices to \"" + unique_da_outfile + "\"" ); + // + } + + public static int storeDomainArchitectures( final String genome, + final SortedMap> domain_architecutures, + final List protein_list, + final Map distinct_domain_architecuture_counts ) { + final Set da = new HashSet(); + domain_architecutures.put( genome, da ); + for( final Protein protein : protein_list ) { + final String da_str = ( ( BasicProtein ) protein ).toDomainArchitectureString( "~", 3, "=" ); + if ( !da.contains( da_str ) ) { + if ( !distinct_domain_architecuture_counts.containsKey( da_str ) ) { + distinct_domain_architecuture_counts.put( da_str, 1 ); + } + else { + distinct_domain_architecuture_counts.put( da_str, + distinct_domain_architecuture_counts.get( da_str ) + 1 ); + } + da.add( da_str ); + } + } + return da.size(); + } + public static void addAllBinaryDomainCombinationToSet( final GenomeWideCombinableDomains genome, final SortedSet binary_domain_combinations ) { final SortedMap all_cd = genome.getAllCombinableDomainsIds(); @@ -372,7 +442,7 @@ public final class SurfacingUtil { nodes.add( n ); } } - for( int i = 0; i < nodes.size() - 1; ++i ) { + for( int i = 0; i < ( nodes.size() - 1 ); ++i ) { for( int j = i + 1; j < nodes.size(); ++j ) { final PhylogenyNode lca = PhylogenyMethods.calculateLCA( nodes.get( i ), nodes.get( j ) ); String rank = "unknown"; @@ -627,7 +697,7 @@ public final class SurfacingUtil { public static Map> createDomainIdToSecondaryFeaturesMap( final File secondary_features_map_file ) throws IOException { - final BasicTable primary_table = BasicTableParser.parse( secondary_features_map_file, "\t" ); + final BasicTable primary_table = BasicTableParser.parse( secondary_features_map_file, '\t' ); final Map> map = new TreeMap>(); for( int r = 0; r < primary_table.getNumberOfRows(); ++r ) { final DomainId domain_id = new DomainId( primary_table.getValue( 0, r ) ); @@ -1621,58 +1691,50 @@ public final class SurfacingUtil { + all_pfams_encountered.size() ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without a mapping : " + pfams_without_mappings_counter + " [" - + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without mapping to proc. or func. : " + pfams_without_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with a mapping : " + pfams_with_mappings_counter - + " [" - + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() ) - + "%]" ); + + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping : " + + pfams_with_mappings_counter + " [" + + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping to proc. or func. : " + pfams_with_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with mapping to biological process: " + biological_process_counter - + " [" - + ( 100 * biological_process_counter / all_pfams_encountered.size() ) - + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with mapping to molecular function: " + molecular_function_counter - + " [" - + ( 100 * molecular_function_counter / all_pfams_encountered.size() ) - + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with mapping to cellular component: " + cellular_component_counter - + " [" - + ( 100 * cellular_component_counter / all_pfams_encountered.size() ) - + "%]" ); + + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to biological process: " + + biological_process_counter + " [" + + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to molecular function: " + + molecular_function_counter + " [" + + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to cellular component: " + + cellular_component_counter + " [" + + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Sum of Pfams encountered : " + all_pfams_encountered.size() ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams without a mapping : " + pfams_without_mappings_counter - + " [" + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" ); + + " [" + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams without mapping to proc. or func. : " + pfams_without_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with a mapping : " + pfams_with_mappings_counter + " [" - + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with a mapping to proc. or func. : " + pfams_with_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to biological process: " + biological_process_counter + " [" - + ( 100 * biological_process_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to molecular function: " + molecular_function_counter + " [" - + ( 100 * molecular_function_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to cellular component: " + cellular_component_counter + " [" - + ( 100 * cellular_component_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.close(); }