X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FSurfacingUtil.java;h=6b3639f43985a5f7b45deec3aaea1460c4672821;hb=03e51d179caedf757b09e2872f9500318bd85a53;hp=4fa94f64c3724dac65b41edb214a6f888d1edcf8;hpb=13fe062e973e7b025c08910326cf9b3206ee8cdb;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index 4fa94f6..6b3639f 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -66,7 +66,6 @@ import org.forester.go.GoTerm; import org.forester.go.PfamToGoMapping; import org.forester.io.parsers.nexus.NexusConstants; import org.forester.io.writers.PhylogenyWriter; -import org.forester.msa.MsaCompactor.SORT_BY; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; @@ -373,10 +372,9 @@ public final class SurfacingUtil { nodes.add( n ); } } - for( int i = 0; i < nodes.size() - 1; ++i ) { + for( int i = 0; i < ( nodes.size() - 1 ); ++i ) { for( int j = i + 1; j < nodes.size(); ++j ) { - final PhylogenyNode lca = PhylogenyMethods.obtainLCA( nodes.get( i ), - nodes.get( j ) ); + final PhylogenyNode lca = PhylogenyMethods.calculateLCA( nodes.get( i ), nodes.get( j ) ); String rank = "unknown"; if ( lca.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) { @@ -1188,7 +1186,9 @@ public final class SurfacingUtil { final String separator, final String limit_to_species, final double domain_e_cutoff ) throws IOException { + System.out.println( "Per domain E-value: " + domain_e_cutoff ); for( final Species species : protein_lists_per_species.keySet() ) { + System.out.println( species + ":" ); for( final Protein protein : protein_lists_per_species.get( species ) ) { if ( ForesterUtil.isEmpty( limit_to_species ) || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) { @@ -1200,34 +1200,33 @@ public final class SurfacingUtil { out.write( separator ); out.write( domain_id.toString() ); out.write( separator ); + int prev_to = -1; for( final Domain domain : domains ) { - if ( domain_e_cutoff < 0 || domain.getPerDomainEvalue() <= domain_e_cutoff ) { - out.write( "/" ); - out.write( domain.getFrom() + "-" + domain.getTo() ); + if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) { + out.write( "/" ); + out.write( domain.getFrom() + "-" + domain.getTo() ); + if ( prev_to >= 0 ) { + final int l = domain.getFrom() - prev_to; + System.out.println( l ); + } + prev_to = domain.getTo(); } } out.write( "/" ); out.write( separator ); - final List domain_list = new ArrayList(); - for( final Domain domain : protein.getProteinDomains() ) { - if ( domain_e_cutoff < 0 || domain.getPerDomainEvalue() <= domain_e_cutoff ) { + if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) { domain_list.add( domain ); } } - - Domain domain_ary[] = new Domain[ domain_list.size() ]; - + final Domain domain_ary[] = new Domain[ domain_list.size() ]; for( int i = 0; i < domain_list.size(); ++i ) { domain_ary[ i ] = domain_list.get( i ); } - Arrays.sort( domain_ary, new DomainComparator( true ) ); - out.write( "{" ); boolean first = true; - for( final Domain domain : domain_ary ) { if ( first ) { first = false; @@ -1236,7 +1235,7 @@ public final class SurfacingUtil { out.write( "," ); } out.write( domain.getDomainId().toString() ); - out.write( ":" + domain.getFrom() + "-" + domain.getTo() ); + out.write( ":" + domain.getFrom() + "-" + domain.getTo() ); out.write( ":" + domain.getPerDomainEvalue() ); } out.write( "}" ); @@ -1622,58 +1621,50 @@ public final class SurfacingUtil { + all_pfams_encountered.size() ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without a mapping : " + pfams_without_mappings_counter + " [" - + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without mapping to proc. or func. : " + pfams_without_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with a mapping : " + pfams_with_mappings_counter - + " [" - + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() ) - + "%]" ); + + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping : " + + pfams_with_mappings_counter + " [" + + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping to proc. or func. : " + pfams_with_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with mapping to biological process: " + biological_process_counter - + " [" - + ( 100 * biological_process_counter / all_pfams_encountered.size() ) - + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with mapping to molecular function: " + molecular_function_counter - + " [" - + ( 100 * molecular_function_counter / all_pfams_encountered.size() ) - + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with mapping to cellular component: " + cellular_component_counter - + " [" - + ( 100 * cellular_component_counter / all_pfams_encountered.size() ) - + "%]" ); + + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to biological process: " + + biological_process_counter + " [" + + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to molecular function: " + + molecular_function_counter + " [" + + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to cellular component: " + + cellular_component_counter + " [" + + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Sum of Pfams encountered : " + all_pfams_encountered.size() ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams without a mapping : " + pfams_without_mappings_counter - + " [" + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" ); + + " [" + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams without mapping to proc. or func. : " + pfams_without_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with a mapping : " + pfams_with_mappings_counter + " [" - + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with a mapping to proc. or func. : " + pfams_with_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to biological process: " + biological_process_counter + " [" - + ( 100 * biological_process_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to molecular function: " + molecular_function_counter + " [" - + ( 100 * molecular_function_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to cellular component: " + cellular_component_counter + " [" - + ( 100 * cellular_component_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.close(); } @@ -2641,7 +2632,6 @@ public final class SurfacingUtil { @Override public final int compare( final Domain d0, final Domain d1 ) { - if ( d0.getFrom() < d1.getFrom() ) { return _ascending ? -1 : 1; } @@ -2649,13 +2639,6 @@ public final class SurfacingUtil { return _ascending ? 1 : -1; } return 0; - } - } } - - - - -