From 8aadbec1b0627ebd71e57e60e06621d4038bb79a Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 15 Apr 2015 02:00:22 +0000 Subject: [PATCH] in progress --- .../org/forester/application/msa_compactor.java | 18 ++++++++++++------ .../org/forester/msa_compactor/MsaCompactor.java | 20 ++++++++++---------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/forester/java/src/org/forester/application/msa_compactor.java b/forester/java/src/org/forester/application/msa_compactor.java index e60d886..e555593 100644 --- a/forester/java/src/org/forester/application/msa_compactor.java +++ b/forester/java/src/org/forester/application/msa_compactor.java @@ -105,7 +105,7 @@ public class msa_compactor { int length = -1; int step = 1; boolean realign = false; - boolean norm = true; + boolean normalize_for_effective_seq_length = true; String path_to_mafft = null; int step_for_diagnostics = 1; int min_length = -1; @@ -223,7 +223,7 @@ public class msa_compactor { path_to_mafft = cla.getOptionValueAsCleanString( PATH_TO_MAFFT_OPTION ); } if ( cla.isOptionSet( DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION ) ) { - norm = false; + normalize_for_effective_seq_length = false; } if ( cla.isOptionSet( STEP_FOR_DIAGNOSTICS_OPTION ) ) { step_for_diagnostics = cla.getOptionValueAsInt( STEP_FOR_DIAGNOSTICS_OPTION ); @@ -346,8 +346,11 @@ public class msa_compactor { } System.out.println( "Step for diagnostics reports : " + step_for_diagnostics ); System.out.println( "Calculate normalized Shannon Entropy : " + report_entropy ); - if ( !norm ) { - System.out.println( "Normalize : " + norm ); + if ( normalize_for_effective_seq_length ) { + System.out.println( "Normalize : with individual, effective sequence lenghts" ); + } + else { + System.out.println( "Normalize : with MSA length" ); } System.out.println( "Realign with MAFFT : " + realign ); if ( realign ) { @@ -372,7 +375,7 @@ public class msa_compactor { if ( removed_seqs_out_base != null ) { mc.setRemovedSeqsOutBase( removed_seqs_out_base ); } - mc.setNorm( norm ); + mc.setNorm( normalize_for_effective_seq_length ); mc.setRealign( realign ); if ( realign ) { mc.setPathToMafft( path_to_mafft ); @@ -391,7 +394,7 @@ public class msa_compactor { msa_props = mc.removeViaLength( length ); } else { - msa_props = mc.chart( step, realign, norm ); + msa_props = mc.chart( step, realign, normalize_for_effective_seq_length ); } Chart.display( msa_props, initial_number_of_seqs, report_entropy, in.getName() ); System.out.println(); @@ -500,6 +503,9 @@ public class msa_compactor { + "= maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)" ); System.out.println( " -" + PERFORM_PHYLOGENETIC_INFERENCE + " to calculate a simple phylogenetic tree (Kimura distances, NJ)" ); + System.out.println( " -" + DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION + + " to normalize gap-contributions with MSA length, instead of individual effective sequence lenghts" ); + System.out.println(); System.out.println(); System.out.println(); diff --git a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java index b4d2fe7..8a13b5c 100644 --- a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java +++ b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java @@ -82,7 +82,7 @@ public class MsaCompactor { // private String _maffts_opts = "--auto"; private DeleteableMsa _msa = null; - private boolean _norm = true; + private boolean _normalize_for_effective_seq_length = true; private File _out_file_base = null; private MSA_FORMAT _output_format = MSA_FORMAT.FASTA; private String _path_to_mafft = null; @@ -130,9 +130,9 @@ public class MsaCompactor { return phy; } - public final List chart( final int step, final boolean realign, final boolean norm ) + public final List chart( final int step, final boolean realign, final boolean normalize_for_effective_seq_length ) throws IOException, InterruptedException { - final GapContribution stats[] = calcGapContribtionsStats( norm ); + final GapContribution stats[] = calcGapContribtionsStats( normalize_for_effective_seq_length ); final List to_remove_ids = new ArrayList(); final List msa_props = new ArrayList(); for( final GapContribution gap_gontribution : stats ) { @@ -305,7 +305,7 @@ public class MsaCompactor { public final List removeViaGapAverage( final double mean_gapiness ) throws IOException, InterruptedException { - final GapContribution stats[] = calcGapContribtionsStats( _norm ); + final GapContribution stats[] = calcGapContribtionsStats( _normalize_for_effective_seq_length ); final List to_remove_ids = new ArrayList(); final List msa_props = new ArrayList(); for( final GapContribution gap_gontribution : stats ) { @@ -363,7 +363,7 @@ public class MsaCompactor { } public List removeViaLength( final int length ) throws IOException, InterruptedException { - final GapContribution stats[] = calcGapContribtionsStats( _norm ); + final GapContribution stats[] = calcGapContribtionsStats( _normalize_for_effective_seq_length ); final List to_remove_ids = new ArrayList(); final List msa_props = new ArrayList(); for( final GapContribution gap_gontribution : stats ) { @@ -421,7 +421,7 @@ public class MsaCompactor { public final List removeWorstOffenders( final int to_remove ) throws IOException, InterruptedException { - final GapContribution stats[] = calcGapContribtionsStats( _norm ); + final GapContribution stats[] = calcGapContribtionsStats( _normalize_for_effective_seq_length ); final List to_remove_ids = new ArrayList(); final List msa_props = new ArrayList(); for( int j = 0; j < to_remove; ++j ) { @@ -487,8 +487,8 @@ public class MsaCompactor { _maffts_opts = maffts_opts; } - public final void setNorm( final boolean norm ) { - _norm = norm; + public final void setNorm( final boolean normalize_for_effective_seq_length ) { + _normalize_for_effective_seq_length = normalize_for_effective_seq_length; } final public void setOutFileBase( final File out_file_base ) { @@ -584,8 +584,8 @@ public class MsaCompactor { return stats; } - final private GapContribution[] calcGapContribtionsStats( final boolean norm ) { - final GapContribution stats[] = calcGapContribtions( norm ); + final private GapContribution[] calcGapContribtionsStats( final boolean normalize_for_effective_seq_length ) { + final GapContribution stats[] = calcGapContribtions( normalize_for_effective_seq_length ); Arrays.sort( stats ); return stats; } -- 1.7.10.2