in progress...
[jalview.git] / forester / java / src / org / forester / msa_compactor / MsaCompactor.java
index da74dec..8a13b5c 100644 (file)
@@ -82,7 +82,7 @@ public class MsaCompactor {
     //
     private String                             _maffts_opts               = "--auto";
     private DeleteableMsa                      _msa                       = null;
-    private boolean                            _norm                      = true;
+    private boolean                            _normalize_for_effective_seq_length                      = true;
     private File                               _out_file_base             = null;
     private MSA_FORMAT                         _output_format             = MSA_FORMAT.FASTA;
     private String                             _path_to_mafft             = null;
@@ -130,9 +130,9 @@ public class MsaCompactor {
         return phy;
     }
 
-    public final List<MsaProperties> chart( final int step, final boolean realign, final boolean norm )
+    public final List<MsaProperties> chart( final int step, final boolean realign, final boolean normalize_for_effective_seq_length )
             throws IOException, InterruptedException {
-        final GapContribution stats[] = calcGapContribtionsStats( norm );
+        final GapContribution stats[] = calcGapContribtionsStats( normalize_for_effective_seq_length );
         final List<String> to_remove_ids = new ArrayList<String>();
         final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
         for( final GapContribution gap_gontribution : stats ) {
@@ -179,6 +179,7 @@ public class MsaCompactor {
             }
             ++i;
         }
+        
         if ( _phylogentic_inference ) {
             decorateTree( phy, msa_props, true );
             displayTree( phy );
@@ -230,34 +231,32 @@ public class MsaCompactor {
                 it.next().getNodeData().setNodeVisualData( vis );
             }
         }
-      
-                for( int i = 0; i < msa_props.size(); ++i ) {
-                    final MsaProperties msa_prop = msa_props.get( i );
-                    final String id = msa_prop.getRemovedSeq();
-                    if ( !ForesterUtil.isEmpty( id ) ) {
-                        final PhylogenyNode n = phy.getNode( id );
-                        n.setName( n.getName() + " [" + i + "]" );
-                        if ( !chart_only ) {
-                            final NodeVisualData vis = new NodeVisualData();
-                            vis.setFillType( NodeFill.SOLID );
-                            vis.setShape( NodeShape.RECTANGLE );
-                            vis.setNodeColor( ForesterUtil.calcColor( msa_prop.getLength(), min, max, mean_color, max_color ) );
-                            n.getNodeData().setNodeVisualData( vis );
-                        }
-                        else {
-                            n.getNodeData()
-                                    .getNodeVisualData()
-                                    .setNodeColor( ForesterUtil.calcColor( msa_prop.getLength(),
-                                                                           min,
-                                                                           max,
-                                                                           mean,
-                                                                           min_color,
-                                                                           max_color,
-                                                                           mean_color ) );
-                        }
-                    }
-        
+        for( int i = 0; i < msa_props.size(); ++i ) {
+            final MsaProperties msa_prop = msa_props.get( i );
+            final String id = msa_prop.getRemovedSeq();
+            if ( !ForesterUtil.isEmpty( id ) ) {
+                final PhylogenyNode n = phy.getNode( id );
+                n.setName( n.getName() + " [" + i + "]" );
+                if ( !chart_only ) {
+                    final NodeVisualData vis = new NodeVisualData();
+                    vis.setFillType( NodeFill.SOLID );
+                    vis.setShape( NodeShape.RECTANGLE );
+                    vis.setNodeColor( ForesterUtil.calcColor( msa_prop.getLength(), min, max, mean_color, max_color ) );
+                    n.getNodeData().setNodeVisualData( vis );
+                }
+                else {
+                    n.getNodeData()
+                    .getNodeVisualData()
+                    .setNodeColor( ForesterUtil.calcColor( msa_prop.getLength(),
+                                                           min,
+                                                           max,
+                                                           mean,
+                                                           min_color,
+                                                           max_color,
+                                                           mean_color ) );
                 }
+            }
+        }
     }
 
     final public void deleteGapColumns( final double max_allowed_gap_ratio ) {
@@ -274,6 +273,7 @@ public class MsaCompactor {
         config.setDisplaySequenceNames( false );
         config.setDisplaySequenceSymbols( false );
         config.setDisplayGeneNames( false );
+        config.setDisplayMultipleSequenceAlignment( true );
         config.setShowScale( true );
         config.setAddTaxonomyImagesCB( false );
         config.setBaseFontSize( 9 );
@@ -305,7 +305,7 @@ public class MsaCompactor {
 
     public final List<MsaProperties> removeViaGapAverage( final double mean_gapiness ) throws IOException,
     InterruptedException {
-        final GapContribution stats[] = calcGapContribtionsStats( _norm );
+        final GapContribution stats[] = calcGapContribtionsStats( _normalize_for_effective_seq_length );
         final List<String> to_remove_ids = new ArrayList<String>();
         final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
         for( final GapContribution gap_gontribution : stats ) {
@@ -349,14 +349,21 @@ public class MsaCompactor {
             System.out.println( msg );
         }
         if ( _phylogentic_inference ) {
-            decorateTree( phy,  msa_props, false );
+            decorateTree( phy, msa_props, false );
             displayTree( phy );
-        }
+            System.out.println( "calculating phylogentic tree..." );
+            System.out.println();
+            final Phylogeny phy2 = calcTree();
+            addSeqs2Tree( _msa, phy2 );
+            displayTree( phy2 );
+        }   
+      
+       
         return msa_props;
     }
 
     public List<MsaProperties> removeViaLength( final int length ) throws IOException, InterruptedException {
-        final GapContribution stats[] = calcGapContribtionsStats( _norm );
+        final GapContribution stats[] = calcGapContribtionsStats( _normalize_for_effective_seq_length );
         final List<String> to_remove_ids = new ArrayList<String>();
         final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
         for( final GapContribution gap_gontribution : stats ) {
@@ -400,15 +407,21 @@ public class MsaCompactor {
             System.out.println( msg );
         }
         if ( _phylogentic_inference ) {
-            decorateTree( phy,  msa_props, false );
+            decorateTree( phy, msa_props, false );
             displayTree( phy );
-        }
+            System.out.println( "calculating phylogentic tree..." );
+            System.out.println();
+            final Phylogeny phy2 = calcTree();
+            addSeqs2Tree( _msa, phy2 );
+            displayTree( phy2 );
+        }   
+       
         return msa_props;
     }
 
     public final List<MsaProperties> removeWorstOffenders( final int to_remove ) throws IOException,
     InterruptedException {
-        final GapContribution stats[] = calcGapContribtionsStats( _norm );
+        final GapContribution stats[] = calcGapContribtionsStats( _normalize_for_effective_seq_length );
         final List<String> to_remove_ids = new ArrayList<String>();
         final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
         for( int j = 0; j < to_remove; ++j ) {
@@ -450,17 +463,15 @@ public class MsaCompactor {
             System.out.println( msg );
         }
         if ( _phylogentic_inference ) {
-            decorateTree( phy,  msa_props, false );
+            decorateTree( phy, msa_props, false );
             displayTree( phy );
-            
-           
-                System.out.println( "calculating phylogentic tree..." );
-                System.out.println();
-                Phylogeny phy2 = calcTree();
-                addSeqs2Tree( _msa, phy2 );
-                displayTree( phy2 );
-            
-        }
+            System.out.println( "calculating phylogentic tree..." );
+            System.out.println();
+            final Phylogeny phy2 = calcTree();
+            addSeqs2Tree( _msa, phy2 );
+            displayTree( phy2 );
+        }   
+       
         return msa_props;
     }
 
@@ -476,8 +487,8 @@ public class MsaCompactor {
         _maffts_opts = maffts_opts;
     }
 
-    public final void setNorm( final boolean norm ) {
-        _norm = norm;
+    public final void setNorm( final boolean normalize_for_effective_seq_length ) {
+        _normalize_for_effective_seq_length = normalize_for_effective_seq_length;
     }
 
     final public void setOutFileBase( final File out_file_base ) {
@@ -573,8 +584,8 @@ public class MsaCompactor {
         return stats;
     }
 
-    final private GapContribution[] calcGapContribtionsStats( final boolean norm ) {
-        final GapContribution stats[] = calcGapContribtions( norm );
+    final private GapContribution[] calcGapContribtionsStats( final boolean normalize_for_effective_seq_length ) {
+        final GapContribution stats[] = calcGapContribtions( normalize_for_effective_seq_length );
         Arrays.sort( stats );
         return stats;
     }
@@ -641,6 +652,8 @@ public class MsaCompactor {
         sb.append( msa_properties.getLength() );
         sb.append( "\t" );
         sb.append( NF_4.format( msa_properties.getGapRatio() ) );
+        sb.append( "\t" );
+        sb.append( NF_1.format( msa_properties.getAvgNumberOfGaps() ) );
         if ( _calculate_shannon_entropy ) {
             sb.append( "\t" );
             sb.append( NF_4.format( msa_properties.getEntropy7() ) );
@@ -708,6 +721,8 @@ public class MsaCompactor {
         System.out.print( "\t" );
         System.out.print( "Length" );
         System.out.print( "\t" );
+        System.out.print( "Gap R" );
+        System.out.print( "\t" );
         System.out.print( "Gaps" );
         System.out.print( "\t" );
         if ( _calculate_shannon_entropy ) {