inprogress
[jalview.git] / forester / java / src / org / forester / surfacing / SurfacingUtil.java
index 4fa94f6..5067bc2 100644 (file)
@@ -22,7 +22,7 @@
 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
 //
 // Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
 
 package org.forester.surfacing;
 
@@ -66,11 +66,10 @@ import org.forester.go.GoTerm;
 import org.forester.go.PfamToGoMapping;
 import org.forester.io.parsers.nexus.NexusConstants;
 import org.forester.io.writers.PhylogenyWriter;
-import org.forester.msa.MsaCompactor.SORT_BY;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.PhylogenyNodeI.NH_CONVERSION_SUPPORT_VALUE_STYLE;
+import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE;
 import org.forester.phylogeny.data.BinaryCharacters;
 import org.forester.phylogeny.data.Confidence;
 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
@@ -373,10 +372,9 @@ public final class SurfacingUtil {
                         nodes.add( n );
                     }
                 }
-                for( int i = 0; i < nodes.size() - 1; ++i ) {
+                for( int i = 0; i < ( nodes.size() - 1 ); ++i ) {
                     for( int j = i + 1; j < nodes.size(); ++j ) {
-                        final PhylogenyNode lca = PhylogenyMethods.obtainLCA( nodes.get( i ),
-                                                                                            nodes.get( j ) );
+                        final PhylogenyNode lca = PhylogenyMethods.calculateLCA( nodes.get( i ), nodes.get( j ) );
                         String rank = "unknown";
                         if ( lca.getNodeData().isHasTaxonomy()
                                 && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) {
@@ -629,7 +627,7 @@ public final class SurfacingUtil {
 
     public static Map<DomainId, Set<String>> createDomainIdToSecondaryFeaturesMap( final File secondary_features_map_file )
             throws IOException {
-        final BasicTable<String> primary_table = BasicTableParser.parse( secondary_features_map_file, "\t" );
+        final BasicTable<String> primary_table = BasicTableParser.parse( secondary_features_map_file, '\t' );
         final Map<DomainId, Set<String>> map = new TreeMap<DomainId, Set<String>>();
         for( int r = 0; r < primary_table.getNumberOfRows(); ++r ) {
             final DomainId domain_id = new DomainId( primary_table.getValue( 0, r ) );
@@ -1188,7 +1186,9 @@ public final class SurfacingUtil {
                                             final String separator,
                                             final String limit_to_species,
                                             final double domain_e_cutoff ) throws IOException {
+        System.out.println( "Per domain E-value: " + domain_e_cutoff );
         for( final Species species : protein_lists_per_species.keySet() ) {
+            System.out.println( species + ":" );
             for( final Protein protein : protein_lists_per_species.get( species ) ) {
                 if ( ForesterUtil.isEmpty( limit_to_species )
                         || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
@@ -1200,34 +1200,33 @@ public final class SurfacingUtil {
                         out.write( separator );
                         out.write( domain_id.toString() );
                         out.write( separator );
+                        int prev_to = -1;
                         for( final Domain domain : domains ) {
-                            if ( domain_e_cutoff < 0 || domain.getPerDomainEvalue() <= domain_e_cutoff ) {
-                                 out.write( "/" );
-                                 out.write( domain.getFrom() + "-" + domain.getTo() );
+                            if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) {
+                                out.write( "/" );
+                                out.write( domain.getFrom() + "-" + domain.getTo() );
+                                if ( prev_to >= 0 ) {
+                                    final int l = domain.getFrom() - prev_to;
+                                    System.out.println( l );
+                                }
+                                prev_to = domain.getTo();
                             }
                         }
                         out.write( "/" );
                         out.write( separator );
-                        
                         final List<Domain> domain_list = new ArrayList<Domain>();
-                        
                         for( final Domain domain : protein.getProteinDomains() ) {
-                            if ( domain_e_cutoff < 0 || domain.getPerDomainEvalue() <= domain_e_cutoff ) {
+                            if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) {
                                 domain_list.add( domain );
                             }
                         }
-                        
-                        Domain domain_ary[] = new Domain[ domain_list.size() ];
-                        
+                        final Domain domain_ary[] = new Domain[ domain_list.size() ];
                         for( int i = 0; i < domain_list.size(); ++i ) {
                             domain_ary[ i ] = domain_list.get( i );
                         }
-                        
                         Arrays.sort( domain_ary, new DomainComparator( true ) );
-                       
                         out.write( "{" );
                         boolean first = true;
-                        
                         for( final Domain domain : domain_ary ) {
                             if ( first ) {
                                 first = false;
@@ -1236,7 +1235,7 @@ public final class SurfacingUtil {
                                 out.write( "," );
                             }
                             out.write( domain.getDomainId().toString() );
-                            out.write( ":" + domain.getFrom() +  "-" + domain.getTo() );
+                            out.write( ":" + domain.getFrom() + "-" + domain.getTo() );
                             out.write( ":" + domain.getPerDomainEvalue() );
                         }
                         out.write( "}" );
@@ -1622,58 +1621,50 @@ public final class SurfacingUtil {
                     + all_pfams_encountered.size() );
             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without a mapping                 : "
                     + pfams_without_mappings_counter + " ["
-                    + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" );
+                    + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" );
             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams without mapping to proc. or func. : "
                     + pfams_without_mappings_to_bp_or_mf_counter + " ["
-                    + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" );
-            ForesterUtil.programMessage( surfacing.PRG_NAME,
-                                         "Pfams with a mapping                    : " + pfams_with_mappings_counter
-                                                 + " ["
-                                                 + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() )
-                                                 + "%]" );
+                    + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" );
+            ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping                    : "
+                    + pfams_with_mappings_counter + " ["
+                    + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" );
             ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with a mapping to proc. or func.  : "
                     + pfams_with_mappings_to_bp_or_mf_counter + " ["
-                    + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" );
-            ForesterUtil.programMessage( surfacing.PRG_NAME,
-                                         "Pfams with mapping to biological process: " + biological_process_counter
-                                                 + " ["
-                                                 + ( 100 * biological_process_counter / all_pfams_encountered.size() )
-                                                 + "%]" );
-            ForesterUtil.programMessage( surfacing.PRG_NAME,
-                                         "Pfams with mapping to molecular function: " + molecular_function_counter
-                                                 + " ["
-                                                 + ( 100 * molecular_function_counter / all_pfams_encountered.size() )
-                                                 + "%]" );
-            ForesterUtil.programMessage( surfacing.PRG_NAME,
-                                         "Pfams with mapping to cellular component: " + cellular_component_counter
-                                                 + " ["
-                                                 + ( 100 * cellular_component_counter / all_pfams_encountered.size() )
-                                                 + "%]" );
+                    + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" );
+            ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to biological process: "
+                    + biological_process_counter + " ["
+                    + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" );
+            ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to molecular function: "
+                    + molecular_function_counter + " ["
+                    + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" );
+            ForesterUtil.programMessage( surfacing.PRG_NAME, "Pfams with mapping to cellular component: "
+                    + cellular_component_counter + " ["
+                    + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" );
             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
             summary_writer.write( "# Sum of Pfams encountered                : " + all_pfams_encountered.size() );
             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
             summary_writer.write( "# Pfams without a mapping                 : " + pfams_without_mappings_counter
-                    + " [" + ( 100 * pfams_without_mappings_counter / all_pfams_encountered.size() ) + "%]" );
+                    + " [" + ( ( 100 * pfams_without_mappings_counter ) / all_pfams_encountered.size() ) + "%]" );
             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
             summary_writer.write( "# Pfams without mapping to proc. or func. : "
                     + pfams_without_mappings_to_bp_or_mf_counter + " ["
-                    + ( 100 * pfams_without_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" );
+                    + ( ( 100 * pfams_without_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" );
             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
             summary_writer.write( "# Pfams with a mapping                    : " + pfams_with_mappings_counter + " ["
-                    + ( 100 * pfams_with_mappings_counter / all_pfams_encountered.size() ) + "%]" );
+                    + ( ( 100 * pfams_with_mappings_counter ) / all_pfams_encountered.size() ) + "%]" );
             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
             summary_writer.write( "# Pfams with a mapping to proc. or func.  : "
                     + pfams_with_mappings_to_bp_or_mf_counter + " ["
-                    + ( 100 * pfams_with_mappings_to_bp_or_mf_counter / all_pfams_encountered.size() ) + "%]" );
+                    + ( ( 100 * pfams_with_mappings_to_bp_or_mf_counter ) / all_pfams_encountered.size() ) + "%]" );
             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
             summary_writer.write( "# Pfams with mapping to biological process: " + biological_process_counter + " ["
-                    + ( 100 * biological_process_counter / all_pfams_encountered.size() ) + "%]" );
+                    + ( ( 100 * biological_process_counter ) / all_pfams_encountered.size() ) + "%]" );
             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
             summary_writer.write( "# Pfams with mapping to molecular function: " + molecular_function_counter + " ["
-                    + ( 100 * molecular_function_counter / all_pfams_encountered.size() ) + "%]" );
+                    + ( ( 100 * molecular_function_counter ) / all_pfams_encountered.size() ) + "%]" );
             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
             summary_writer.write( "# Pfams with mapping to cellular component: " + cellular_component_counter + " ["
-                    + ( 100 * cellular_component_counter / all_pfams_encountered.size() ) + "%]" );
+                    + ( ( 100 * cellular_component_counter ) / all_pfams_encountered.size() ) + "%]" );
             summary_writer.write( ForesterUtil.LINE_SEPARATOR );
             summary_writer.close();
         }
@@ -2641,7 +2632,6 @@ public final class SurfacingUtil {
 
         @Override
         public final int compare( final Domain d0, final Domain d1 ) {
-
             if ( d0.getFrom() < d1.getFrom() ) {
                 return _ascending ? -1 : 1;
             }
@@ -2649,13 +2639,6 @@ public final class SurfacingUtil {
                 return _ascending ? 1 : -1;
             }
             return 0;
-
         }
-
     }
 }
-
-
-
-
-