X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FSurfacingUtil.java;h=5067bc20b42333ac0206279c43c2d68baab253e9;hb=789f3450d14e1f922072f4288833afab71993667;hp=1cc2c7a0c4da0718edc106d48c4219ba93dbb410;hpb=cec76926e7d634373e238e61b805c723ef4c0ca7;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index 1cc2c7a..5067bc2 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -22,7 +22,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.surfacing; @@ -66,11 +66,10 @@ import org.forester.go.GoTerm; import org.forester.go.PfamToGoMapping; import org.forester.io.parsers.nexus.NexusConstants; import org.forester.io.writers.PhylogenyWriter; -import org.forester.msa.MsaCompactor.SORT_BY; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.PhylogenyNodeI.NH_CONVERSION_SUPPORT_VALUE_STYLE; +import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; @@ -373,10 +372,9 @@ public final class SurfacingUtil { nodes.add( n ); } } - for( int i = 0; i < nodes.size() - 1; ++i ) { + for( int i = 0; i < ( nodes.size() - 1 ); ++i ) { for( int j = i + 1; j < nodes.size(); ++j ) { - final PhylogenyNode lca = PhylogenyMethods.obtainLCA( nodes.get( i ), - nodes.get( j ) ); + final PhylogenyNode lca = PhylogenyMethods.calculateLCA( nodes.get( i ), nodes.get( j ) ); String rank = "unknown"; if ( lca.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) { @@ -629,7 +627,7 @@ public final class SurfacingUtil { public static Map> createDomainIdToSecondaryFeaturesMap( final File secondary_features_map_file ) throws IOException { - final BasicTable primary_table = BasicTableParser.parse( secondary_features_map_file, "\t" ); + final BasicTable primary_table = BasicTableParser.parse( secondary_features_map_file, '\t' ); final Map> map = new TreeMap>(); for( int r = 0; r < primary_table.getNumberOfRows(); ++r ) { final DomainId domain_id = new DomainId( primary_table.getValue( 0, r ) ); @@ -1190,6 +1188,7 @@ public final class SurfacingUtil { final double domain_e_cutoff ) throws IOException { System.out.println( "Per domain E-value: " + domain_e_cutoff ); for( final Species species : protein_lists_per_species.keySet() ) { + System.out.println( species + ":" ); for( final Protein protein : protein_lists_per_species.get( species ) ) { if ( ForesterUtil.isEmpty( limit_to_species ) || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) { @@ -1201,34 +1200,33 @@ public final class SurfacingUtil { out.write( separator ); out.write( domain_id.toString() ); out.write( separator ); + int prev_to = -1; for( final Domain domain : domains ) { - if ( domain_e_cutoff < 0 || domain.getPerDomainEvalue() <= domain_e_cutoff ) { - out.write( "/" ); - out.write( domain.getFrom() + "-" + domain.getTo() ); + if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) { + out.write( "/" ); + out.write( domain.getFrom() + "-" + domain.getTo() ); + if ( prev_to >= 0 ) { + final int l = domain.getFrom() - prev_to; + System.out.println( l ); + } + prev_to = domain.getTo(); } } out.write( "/" ); out.write( separator ); - final List domain_list = new ArrayList(); - for( final Domain domain : protein.getProteinDomains() ) { - if ( domain_e_cutoff < 0 || domain.getPerDomainEvalue() <= domain_e_cutoff ) { + if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) { domain_list.add( domain ); } } - - Domain domain_ary[] = new Domain[ domain_list.size() ]; - + final Domain domain_ary[] = new Domain[ domain_list.size() ]; for( int i = 0; i < domain_list.size(); ++i ) { domain_ary[ i ] = domain_list.get( i ); } - Arrays.sort( domain_ary, new DomainComparator( true ) ); - out.write( "{" ); boolean first = true; - for( final Domain domain : domain_ary ) { if ( first ) { first = false; @@ -1237,7 +1235,7 @@ public final class SurfacingUtil { out.write( "," ); } out.write( domain.getDomainId().toString() ); - out.write( ":" + domain.getFrom() + "-" + domain.getTo() ); + out.write( ":" + domain.getFrom() + "-" + domain.getTo() ); out.write( ":" + domain.getPerDomainEvalue() ); } out.write( "}" ); @@ -1623,58 +1621,50 @@ public final class SurfacingUtil { + all_pfams_encountered.size() ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without a mapping : " + pfams_without_mappings_counter + " [" - + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without mapping to proc. or func. : " + pfams_without_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with a mapping : " + pfams_with_mappings_counter - + " [" - + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() ) - + "%]" ); + + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping : " + + pfams_with_mappings_counter + " [" + + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping to proc. or func. : " + pfams_with_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with mapping to biological process: " + biological_process_counter - + " [" - + ( 100 * biological_process_counter / all_pfams_encountered.size() ) - + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with mapping to molecular function: " + molecular_function_counter - + " [" - + ( 100 * molecular_function_counter / all_pfams_encountered.size() ) - + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with mapping to cellular component: " + cellular_component_counter - + " [" - + ( 100 * cellular_component_counter / all_pfams_encountered.size() ) - + "%]" ); + + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to biological process: " + + biological_process_counter + " [" + + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to molecular function: " + + molecular_function_counter + " [" + + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to cellular component: " + + cellular_component_counter + " [" + + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Sum of Pfams encountered : " + all_pfams_encountered.size() ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams without a mapping : " + pfams_without_mappings_counter - + " [" + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" ); + + " [" + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams without mapping to proc. or func. : " + pfams_without_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with a mapping : " + pfams_with_mappings_counter + " [" - + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with a mapping to proc. or func. : " + pfams_with_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to biological process: " + biological_process_counter + " [" - + ( 100 * biological_process_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to molecular function: " + molecular_function_counter + " [" - + ( 100 * molecular_function_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to cellular component: " + cellular_component_counter + " [" - + ( 100 * cellular_component_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.close(); } @@ -2642,7 +2632,6 @@ public final class SurfacingUtil { @Override public final int compare( final Domain d0, final Domain d1 ) { - if ( d0.getFrom() < d1.getFrom() ) { return _ascending ? -1 : 1; } @@ -2650,13 +2639,6 @@ public final class SurfacingUtil { return _ascending ? 1 : -1; } return 0; - } - } } - - - - -