X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsurfacing%2FSurfacingUtil.java;h=6c036fd6bb52d5a260f4ac0feca9903a6b12b410;hb=dfb728ba91a9112309c510d032464446cecc273e;hp=1774cc68b9a3b02ec8533c1ec880e333c8f803fd;hpb=15f3c39fd477204175c0889f10b8c5e4271acf23;p=jalview.git diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index 1774cc6..6c036fd 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -66,11 +66,10 @@ import org.forester.go.GoTerm; import org.forester.go.PfamToGoMapping; import org.forester.io.parsers.nexus.NexusConstants; import org.forester.io.writers.PhylogenyWriter; -import org.forester.msa.MsaCompactor.SORT_BY; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.PhylogenyNodeI.NH_CONVERSION_SUPPORT_VALUE_STYLE; +import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.BinaryCharacters; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; @@ -373,10 +372,9 @@ public final class SurfacingUtil { nodes.add( n ); } } - for( int i = 0; i < nodes.size() - 1; ++i ) { + for( int i = 0; i < ( nodes.size() - 1 ); ++i ) { for( int j = i + 1; j < nodes.size(); ++j ) { - final PhylogenyNode lca = PhylogenyMethods.obtainLCA( nodes.get( i ), - nodes.get( j ) ); + final PhylogenyNode lca = PhylogenyMethods.calculateLCA( nodes.get( i ), nodes.get( j ) ); String rank = "unknown"; if ( lca.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) { @@ -1186,8 +1184,11 @@ public final class SurfacingUtil { final DomainId domain_id, final Writer out, final String separator, - final String limit_to_species ) throws IOException { + final String limit_to_species, + final double domain_e_cutoff ) throws IOException { + System.out.println( "Per domain E-value: " + domain_e_cutoff ); for( final Species species : protein_lists_per_species.keySet() ) { + System.out.println( species + ":" ); for( final Protein protein : protein_lists_per_species.get( species ) ) { if ( ForesterUtil.isEmpty( limit_to_species ) || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) { @@ -1199,24 +1200,33 @@ public final class SurfacingUtil { out.write( separator ); out.write( domain_id.toString() ); out.write( separator ); + int prev_to = -1; for( final Domain domain : domains ) { - out.write( "/" ); - out.write( domain.getFrom() + "-" + domain.getTo() ); + if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) { + out.write( "/" ); + out.write( domain.getFrom() + "-" + domain.getTo() ); + if ( prev_to >= 0 ) { + final int l = domain.getFrom() - prev_to; + System.out.println( l ); + } + prev_to = domain.getTo(); + } } out.write( "/" ); out.write( separator ); - - Domain domain_ary[] = new Domain[ protein.getProteinDomains().size() ]; - - for( int i = 0; i < protein.getProteinDomains().size(); ++i ) { - domain_ary[ i ] = protein.getProteinDomains().get( i ); + final List domain_list = new ArrayList(); + for( final Domain domain : protein.getProteinDomains() ) { + if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) { + domain_list.add( domain ); + } } - - Arrays.sort( domain_ary, new DomainComparator( false ) ); - + final Domain domain_ary[] = new Domain[ domain_list.size() ]; + for( int i = 0; i < domain_list.size(); ++i ) { + domain_ary[ i ] = domain_list.get( i ); + } + Arrays.sort( domain_ary, new DomainComparator( true ) ); out.write( "{" ); boolean first = true; - for( final Domain domain : domain_ary ) { if ( first ) { first = false; @@ -1225,10 +1235,8 @@ public final class SurfacingUtil { out.write( "," ); } out.write( domain.getDomainId().toString() ); - out.write( ":" ); - out.write( domain.getFrom() ); - out.write( "-" ); - out.write( domain.getTo() ); + out.write( ":" + domain.getFrom() + "-" + domain.getTo() ); + out.write( ":" + domain.getPerDomainEvalue() ); } out.write( "}" ); if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription() @@ -1613,58 +1621,50 @@ public final class SurfacingUtil { + all_pfams_encountered.size() ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without a mapping : " + pfams_without_mappings_counter + " [" - + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without mapping to proc. or func. : " + pfams_without_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with a mapping : " + pfams_with_mappings_counter - + " [" - + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() ) - + "%]" ); + + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping : " + + pfams_with_mappings_counter + " [" + + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping to proc. or func. : " + pfams_with_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with mapping to biological process: " + biological_process_counter - + " [" - + ( 100 * biological_process_counter / all_pfams_encountered.size() ) - + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with mapping to molecular function: " + molecular_function_counter - + " [" - + ( 100 * molecular_function_counter / all_pfams_encountered.size() ) - + "%]" ); - ForesterUtil.programMessage( surfacing.PRG_NAME, - "Pfams with mapping to cellular component: " + cellular_component_counter - + " [" - + ( 100 * cellular_component_counter / all_pfams_encountered.size() ) - + "%]" ); + + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to biological process: " + + biological_process_counter + " [" + + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to molecular function: " + + molecular_function_counter + " [" + + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" ); + ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to cellular component: " + + cellular_component_counter + " [" + + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Sum of Pfams encountered : " + all_pfams_encountered.size() ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams without a mapping : " + pfams_without_mappings_counter - + " [" + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" ); + + " [" + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams without mapping to proc. or func. : " + pfams_without_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with a mapping : " + pfams_with_mappings_counter + " [" - + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with a mapping to proc. or func. : " + pfams_with_mappings_to_bp_or_mf_counter + " [" - + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to biological process: " + biological_process_counter + " [" - + ( 100 * biological_process_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to molecular function: " + molecular_function_counter + " [" - + ( 100 * molecular_function_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.write( "# Pfams with mapping to cellular component: " + cellular_component_counter + " [" - + ( 100 * cellular_component_counter / all_pfams_encountered.size() ) + "%]" ); + + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" ); summary_writer.write( ForesterUtil.LINE_SEPARATOR ); summary_writer.close(); } @@ -2632,7 +2632,6 @@ public final class SurfacingUtil { @Override public final int compare( final Domain d0, final Domain d1 ) { - if ( d0.getFrom() < d1.getFrom() ) { return _ascending ? -1 : 1; } @@ -2640,13 +2639,6 @@ public final class SurfacingUtil { return _ascending ? 1 : -1; } return 0; - } - } } - - - - -