From 920278dc79977b8fc30b43ef06a7d207dccbe9cd Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Mon, 28 Apr 2014 19:49:11 +0000 Subject: [PATCH] inprogress --- .../org/forester/application/msa_compactor.java | 42 +++++++++++--- .../java/src/org/forester/msa/DeleteableMsa.java | 2 +- .../org/forester/msa_compactor/MsaCompactor.java | 60 ++++++++++++-------- 3 files changed, 71 insertions(+), 33 deletions(-) diff --git a/forester/java/src/org/forester/application/msa_compactor.java b/forester/java/src/org/forester/application/msa_compactor.java index c0b0663..d5b694f 100644 --- a/forester/java/src/org/forester/application/msa_compactor.java +++ b/forester/java/src/org/forester/application/msa_compactor.java @@ -126,6 +126,10 @@ public class msa_compactor { } final DescriptiveStatistics initial_msa_stats = MsaMethods.calculateEffectiveLengthStatistics( msa ); System.out.println( initial_msa_stats.toString() ); + if ( ( cla.isOptionSet( LENGTH_OPTION ) || cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) || cla + .isOptionSet( AV_GAPINESS_OPTION ) ) && ( out == null ) ) { + ForesterUtil.fatalError( PRG_NAME, "outfile file missing" ); + } if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) ) { worst_remove = cla.getOptionValueAsInt( REMOVE_WORST_OFFENDERS_OPTION ); if ( ( worst_remove < 1 ) || ( worst_remove >= msa.getNumberOfSequences() - 1 ) ) { @@ -216,6 +220,11 @@ public class msa_compactor { } } } + if ( ( !cla.isOptionSet( LENGTH_OPTION ) && !cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) && !cla + .isOptionSet( AV_GAPINESS_OPTION ) ) && ( ( out != null ) || ( removed_seqs_out_base != null ) ) ) { + ForesterUtil.fatalError( PRG_NAME, + "chart only, no outfile(s) produced (no need to indicate output file(s))" ); + } ForesterUtil.printProgramInformation( PRG_NAME, PRG_DESC, PRG_VERSION, @@ -254,7 +263,6 @@ public class msa_compactor { System.out.println( "Step for diagnostics reports : " + ( step_for_diagnostics > 1 ? step_for_diagnostics : 1 ) ); System.out.println( "Calculate mean identity : " + report_aln_mean_identity ); - if ( !norm ) { System.out.println( "Normalize : " + norm ); } @@ -279,6 +287,9 @@ public class msa_compactor { } mc.setNorm( norm ); mc.setOutFileBase( out ); + if ( removed_seqs_out_base != null ) { + mc.setRemovedSeqsOutBase( removed_seqs_out_base ); + } if ( step > 1 ) { mc.setStep( step ); } @@ -295,7 +306,15 @@ public class msa_compactor { } mc.setNorm( norm ); mc.setOutFileBase( out ); - mc.setStep( step ); + if ( removed_seqs_out_base != null ) { + mc.setRemovedSeqsOutBase( removed_seqs_out_base ); + } + if ( step > 1 ) { + mc.setStep( step ); + } + if ( step_for_diagnostics > 1 ) { + mc.setStepForDiagnostics( step_for_diagnostics ); + } mc.removeViaGapAverage( av_gap ); } else if ( length > 0 ) { @@ -306,8 +325,15 @@ public class msa_compactor { mc.setPathToMafft( path_to_mafft ); } mc.setNorm( norm ); - mc.setOutFileBase( out ); - mc.setStep( step ); + if ( removed_seqs_out_base != null ) { + mc.setRemovedSeqsOutBase( removed_seqs_out_base ); + } + if ( step > 1 ) { + mc.setStep( step ); + } + if ( step_for_diagnostics > 1 ) { + mc.setStepForDiagnostics( step_for_diagnostics ); + } mc.removeViaLength( length ); } else { @@ -377,7 +403,7 @@ public class msa_compactor { } System.out.println( "Usage:" ); System.out.println(); - System.out.println( PRG_NAME + " " ); + System.out.println( PRG_NAME + " " ); System.out.println(); System.out.println( " options: " ); System.out.println(); @@ -394,8 +420,10 @@ public class msa_compactor { + "= minimal effecive sequence length (for deleting of shorter sequences)" ); System.out.println( " -" + GAP_RATIO_LENGTH_OPTION + "= maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)" ); - System.out.println( " -" + REPORT_ALN_MEAN_IDENTITY - + " to calculate mean MSA column identity (\"MSA quality\") (not recommended for very large alignments)" ); + System.out + .println( " -" + + REPORT_ALN_MEAN_IDENTITY + + " to calculate mean MSA column identity (\"MSA quality\") (not recommended for very large alignments)" ); System.out.println( " -" + OUTPUT_FORMAT_PHYLIP_OPTION + " to write output alignments in phylip format instead of fasta" ); System.out.println( " -" + OUTPUT_REMOVED_SEQS_OPTION + "= to output the removed sequences" ); diff --git a/forester/java/src/org/forester/msa/DeleteableMsa.java b/forester/java/src/org/forester/msa/DeleteableMsa.java index 05bf074..a781bf6 100644 --- a/forester/java/src/org/forester/msa/DeleteableMsa.java +++ b/forester/java/src/org/forester/msa/DeleteableMsa.java @@ -60,7 +60,7 @@ public final class DeleteableMsa extends BasicMsa { } return max; } - + final public void deleteGapColumns( final double max_allowed_gap_ratio ) { if ( ( max_allowed_gap_ratio < 0 ) || ( max_allowed_gap_ratio > 1 ) ) { throw new IllegalArgumentException( "max allowed gap ration is out of range: " + max_allowed_gap_ratio ); diff --git a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java index 30b66d9..374d3bb 100644 --- a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java +++ b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java @@ -41,7 +41,8 @@ import org.forester.evoinference.distance.PairwiseDistanceCalculator; import org.forester.evoinference.distance.PairwiseDistanceCalculator.PWD_DISTANCE_METHOD; import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; import org.forester.evoinference.tools.BootstrapResampler; -import org.forester.msa.BasicMsa; +import org.forester.io.writers.SequenceWriter; +import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; import org.forester.msa.DeleteableMsa; import org.forester.msa.Mafft; import org.forester.msa.Msa; @@ -57,22 +58,26 @@ import org.forester.util.ForesterUtil; public class MsaCompactor { - final private static NumberFormat NF_3 = new DecimalFormat( "#.###" ); - final private static NumberFormat NF_4 = new DecimalFormat( "#.####" ); - private double _gap_ratio = -1; + final private static NumberFormat NF_3 = new DecimalFormat( "#.###" ); + final private static NumberFormat NF_4 = new DecimalFormat( "#.####" ); + private double _gap_ratio = -1; // - private final String _maffts_opts = "--auto"; - private int _min_length = -1; + private final String _maffts_opts = "--auto"; + private int _min_length = -1; // - private DeleteableMsa _msa = null; - private boolean _norm = true; - private File _out_file_base = null; - private MSA_FORMAT _output_format = MSA_FORMAT.FASTA; - private String _path_to_mafft = null; + private DeleteableMsa _msa = null; + private boolean _norm = true; + private File _out_file_base = null; + private MSA_FORMAT _output_format = MSA_FORMAT.FASTA; + private String _path_to_mafft = null; // - private boolean _realign = false; + private boolean _realign = false; private final SortedSet _removed_seq_ids; - private final File _removed_seqs_out_base = null; + private File _removed_seqs_out_base = null; + + public final void setRemovedSeqsOutBase( final File removed_seqs_out_base ) { + _removed_seqs_out_base = removed_seqs_out_base; + } private boolean _report_aln_mean_identity = false; private int _step = -1; private int _step_for_diagnostics = -1; @@ -123,8 +128,10 @@ public class MsaCompactor { System.out.println(); ++i; } - final String msg = writeAndAlignRemovedSeqs(); - System.out.println( msg ); + if ( _removed_seqs_out_base != null ) { + final String msg = writeAndAlignRemovedSeqs(); + System.out.println( msg ); + } } public void removeViaLength( final int length ) throws IOException, InterruptedException { @@ -149,8 +156,10 @@ public class MsaCompactor { System.out.println(); ++i; } - final String msg = writeAndAlignRemovedSeqs(); - System.out.println( msg ); + if ( _removed_seqs_out_base != null ) { + final String msg = writeAndAlignRemovedSeqs(); + System.out.println( msg ); + } } public final void removeWorstOffenders( final int to_remove ) throws IOException, InterruptedException { @@ -177,8 +186,10 @@ public class MsaCompactor { System.out.println(); } } - final String msg = writeAndAlignRemovedSeqs(); - System.out.println( msg ); + if ( _removed_seqs_out_base != null ) { + final String msg = writeAndAlignRemovedSeqs(); + System.out.println( msg ); + } } public final List chart( final int step, final boolean realign, final boolean norm ) @@ -278,10 +289,9 @@ public class MsaCompactor { final public String writeAndAlignRemovedSeqs() throws IOException, InterruptedException { final StringBuilder msg = new StringBuilder(); - final Msa removed = BasicMsa.createInstance( _removed_seqs ); - final String n = _removed_seqs_out_base + "_" + removed.getNumberOfSequences() + ".fasta"; - writeMsa( removed, n, MSA_FORMAT.FASTA ); - msg.append( "wrote " + removed.getNumberOfSequences() + " removed sequences to " + n ); + final String n = _removed_seqs_out_base + "_" + _removed_seqs.size() + ".fasta"; + SequenceWriter.writeSeqs( _removed_seqs, new File( n ), SEQ_FORMAT.FASTA, 100 ); + msg.append( "wrote " + _removed_seqs.size() + " removed sequences to " + n ); if ( _realign ) { final MsaInferrer mafft = Mafft.createInstance( _path_to_mafft ); final List opts = new ArrayList(); @@ -402,7 +412,7 @@ public class MsaCompactor { } private final void printMsaProperties( final String id, final MsaProperties msa_properties ) { - System.out.print( ForesterUtil.pad( _longest_id_length + 1, 20, ' ', false ) ); + System.out.print( ForesterUtil.pad( id, _longest_id_length, ' ', false ) ); System.out.print( "\t" ); final StringBuilder sb = msaPropertiesAsSB( msa_properties ); System.out.print( sb ); @@ -493,7 +503,7 @@ public class MsaCompactor { } private final void printTableHeader() { - System.out.print( ForesterUtil.pad( "Id", _longest_id_length + 1, ' ', false ) ); + System.out.print( ForesterUtil.pad( "Id", _longest_id_length, ' ', false ) ); System.out.print( "\t" ); System.out.print( "Seqs" ); System.out.print( "\t" ); -- 1.7.10.2