From: cmzmasek@gmail.com Date: Mon, 28 Apr 2014 23:54:15 +0000 (+0000) Subject: inprogress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=0f01547b8718b78c8484b160e1c0ed4c372eb8ec;p=jalview.git inprogress --- diff --git a/forester/java/src/org/forester/application/msa_compactor.java b/forester/java/src/org/forester/application/msa_compactor.java index d5b694f..b6cc057 100644 --- a/forester/java/src/org/forester/application/msa_compactor.java +++ b/forester/java/src/org/forester/application/msa_compactor.java @@ -57,7 +57,7 @@ public class msa_compactor { final static private String MIN_LENGTH_OPTION = "ml"; final static private String GAP_RATIO_LENGTH_OPTION = "gr"; final static private String REPORT_ALN_MEAN_IDENTITY = "q"; - final static private String OUTPUT_FORMAT_PHYLIP_OPTION = "f"; + final static private String OUTPUT_FORMAT_PHYLIP_OPTION = "p"; final static private String OUTPUT_REMOVED_SEQS_OPTION = "ro"; final static private String MAFFT_OPTIONS = "mo"; // @@ -65,8 +65,8 @@ public class msa_compactor { final static private String DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION = "nn"; final static private String PRG_NAME = "msa_compactor"; final static private String PRG_DESC = "multiple sequence aligment compactor"; - final static private String PRG_VERSION = "0.02"; - final static private String PRG_DATE = "140316"; + final static private String PRG_VERSION = "0.2"; + final static private String PRG_DATE = "140428"; final static private String E_MAIL = "czmasek@sanfordburham.org"; final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester"; @@ -86,11 +86,11 @@ public class msa_compactor { int worst_remove = -1; double av_gap = -1; int length = -1; - int step = -1; + int step = 1; boolean realign = false; boolean norm = true; String path_to_mafft = null; - int step_for_diagnostics = -1; + int step_for_diagnostics = 1; int min_length = -1; double gap_ratio = -1; boolean report_aln_mean_identity = false; @@ -125,7 +125,7 @@ public class msa_compactor { msa = DeleteableMsa.createInstance( GeneralMsaParser.parse( is ) ); } final DescriptiveStatistics initial_msa_stats = MsaMethods.calculateEffectiveLengthStatistics( msa ); - System.out.println( initial_msa_stats.toString() ); + //System.out.println( initial_msa_stats.toString() ); if ( ( cla.isOptionSet( LENGTH_OPTION ) || cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) || cla .isOptionSet( AV_GAPINESS_OPTION ) ) && ( out == null ) ) { ForesterUtil.fatalError( PRG_NAME, "outfile file missing" ); @@ -259,9 +259,8 @@ public class msa_compactor { System.out.println( "Output format : " + ( output_format == MSA_FORMAT.FASTA ? "fasta" : "phylip" ) ); } - System.out.println( "Step for output and re-aligning) : " + ( step > 1 ? step : 1 ) ); - System.out.println( "Step for diagnostics reports : " - + ( step_for_diagnostics > 1 ? step_for_diagnostics : 1 ) ); + System.out.println( "Step for output and re-aligning : " + step ); + System.out.println( "Step for diagnostics reports : " + step_for_diagnostics ); System.out.println( "Calculate mean identity : " + report_aln_mean_identity ); if ( !norm ) { System.out.println( "Normalize : " + norm ); @@ -279,83 +278,79 @@ public class msa_compactor { System.out.println(); // // + final int initial_number_of_seqs = msa.getNumberOfSequences(); + List msa_props = null; if ( worst_remove > 0 ) { final MsaCompactor mc = new MsaCompactor( msa ); mc.setRealign( realign ); + mc.setOutputFormat( output_format ); if ( realign ) { mc.setPathToMafft( path_to_mafft ); + mc.setMafftOptions( mafft_options ); } mc.setNorm( norm ); + mc.setReportAlnMeanIdentity( report_aln_mean_identity ); mc.setOutFileBase( out ); if ( removed_seqs_out_base != null ) { mc.setRemovedSeqsOutBase( removed_seqs_out_base ); } - if ( step > 1 ) { - mc.setStep( step ); - } - if ( step_for_diagnostics > 1 ) { - mc.setStepForDiagnostics( step_for_diagnostics ); - } - mc.removeWorstOffenders( worst_remove ); + mc.setStep( step ); + mc.setStepForDiagnostics( step_for_diagnostics ); + msa_props = mc.removeWorstOffenders( worst_remove ); } else if ( av_gap > 0 ) { final MsaCompactor mc = new MsaCompactor( msa ); mc.setRealign( realign ); + mc.setOutputFormat( output_format ); if ( realign ) { mc.setPathToMafft( path_to_mafft ); + mc.setMafftOptions( mafft_options ); } mc.setNorm( norm ); + mc.setReportAlnMeanIdentity( report_aln_mean_identity ); mc.setOutFileBase( out ); if ( removed_seqs_out_base != null ) { mc.setRemovedSeqsOutBase( removed_seqs_out_base ); } - if ( step > 1 ) { - mc.setStep( step ); - } - if ( step_for_diagnostics > 1 ) { - mc.setStepForDiagnostics( step_for_diagnostics ); - } - mc.removeViaGapAverage( av_gap ); + mc.setStep( step ); + mc.setStepForDiagnostics( step_for_diagnostics ); + msa_props = mc.removeViaGapAverage( av_gap ); } else if ( length > 0 ) { // TODO if < shortest seq -> error final MsaCompactor mc = new MsaCompactor( msa ); mc.setRealign( realign ); + mc.setOutputFormat( output_format ); if ( realign ) { mc.setPathToMafft( path_to_mafft ); + mc.setMafftOptions( mafft_options ); } mc.setNorm( norm ); + mc.setReportAlnMeanIdentity( report_aln_mean_identity ); + mc.setOutFileBase( out ); if ( removed_seqs_out_base != null ) { mc.setRemovedSeqsOutBase( removed_seqs_out_base ); } - if ( step > 1 ) { - mc.setStep( step ); - } - if ( step_for_diagnostics > 1 ) { - mc.setStepForDiagnostics( step_for_diagnostics ); - } - mc.removeViaLength( length ); + mc.setStep( step ); + mc.setStepForDiagnostics( step_for_diagnostics ); + msa_props = mc.removeViaLength( length ); } else { //MsaCompactor.chart( msa, step, realign, norm, path_to_mafft ); - final int initial_number_of_seqs = msa.getNumberOfSequences(); final MsaCompactor mc = new MsaCompactor( msa ); mc.setRealign( realign ); if ( realign ) { mc.setPathToMafft( path_to_mafft ); + mc.setMafftOptions( mafft_options ); } mc.setNorm( norm ); mc.setReportAlnMeanIdentity( report_aln_mean_identity ); mc.setOutFileBase( out ); - if ( step > 1 ) { - mc.setStep( step ); - } - if ( step_for_diagnostics > 1 ) { - mc.setStepForDiagnostics( step_for_diagnostics ); - } - final List msa_props = mc.chart( step, realign, norm ); - Chart.display( msa_props, initial_number_of_seqs ); + mc.setStep( step ); + mc.setStepForDiagnostics( step_for_diagnostics ); + msa_props = mc.chart( step, realign, norm ); } + Chart.display( msa_props, initial_number_of_seqs, report_aln_mean_identity, in.toString() ); } catch ( final IllegalArgumentException iae ) { iae.printStackTrace(); //TODO remove me @@ -411,19 +406,19 @@ public class msa_compactor { + "= number of worst offender sequences to remove" ); System.out.println( " -" + LENGTH_OPTION + "= target MSA length" ); System.out.println( " -" + AV_GAPINESS_OPTION + "= target gap-ratio (0.0-1.0)" ); - System.out.println( " -" + STEP_OPTION + "= step for output and re-aligning (default: 1)" ); System.out.println( " -" + REALIGN_OPTION + " to realign using MAFFT" + mafft_comment ); System.out.println( " -" + MAFFT_OPTIONS + "= options for MAFFT (default: --auto)" ); + System.out.println( " -" + STEP_OPTION + "= step for output and re-aligning (default: 1)" ); System.out.println( " -" + STEP_FOR_DIAGNOSTICS_OPTION + "= step for diagnostics reports (default: 1)" ); - System.out.println( " -" + MIN_LENGTH_OPTION - + "= minimal effecive sequence length (for deleting of shorter sequences)" ); - System.out.println( " -" + GAP_RATIO_LENGTH_OPTION - + "= maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)" ); + // System.out.println( " -" + MIN_LENGTH_OPTION + // + "= minimal effecive sequence length (for deleting of shorter sequences)" ); + // System.out.println( " -" + GAP_RATIO_LENGTH_OPTION + // + "= maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)" ); System.out .println( " -" + REPORT_ALN_MEAN_IDENTITY - + " to calculate mean MSA column identity (\"MSA quality\") (not recommended for very large alignments)" ); + + " to calculate mean MSA column identity (\"MSA quality\") (not recommended for very large alignments)" ); System.out.println( " -" + OUTPUT_FORMAT_PHYLIP_OPTION + " to write output alignments in phylip format instead of fasta" ); System.out.println( " -" + OUTPUT_REMOVED_SEQS_OPTION + "= to output the removed sequences" ); diff --git a/forester/java/src/org/forester/msa_compactor/Chart.java b/forester/java/src/org/forester/msa_compactor/Chart.java index 48e959d..5563d7c 100644 --- a/forester/java/src/org/forester/msa_compactor/Chart.java +++ b/forester/java/src/org/forester/msa_compactor/Chart.java @@ -50,12 +50,19 @@ public final class Chart extends JDialog implements ActionListener { private ChartPanel _chart_panel = null; private final JMenuItem _m_exit = new JMenuItem(); private List _msa_props; + private final boolean _show_msa_qual; private final int _initial_number_of_seqs; + private final String _title; - private Chart( final List msa_props, final int initial_number_of_seqs ) { + private Chart( final List msa_props, + final int initial_number_of_seqs, + final boolean show_msa_qual, + final String title ) { super(); _msa_props = msa_props; + _title = title; _initial_number_of_seqs = initial_number_of_seqs; + _show_msa_qual = show_msa_qual; setTitle( "msa compactor" ); setSize( 500, 400 ); setResizable( true ); @@ -108,23 +115,26 @@ public final class Chart extends JDialog implements ActionListener { seqs_gaps[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences(); seqs_gaps[ i ][ 1 ] = ForesterUtil.roundToInt( _msa_props.get( i ).getGapRatio() * 100 ); } - model.addData( seqs_gaps, "Gaps" ); - model.setSeriesLine( "Series " + "Gaps", true ); - model.setSeriesMarker( "Series " + "Gaps", false ); - final double[][] seqs_identity = new double[ _msa_props.size() ][ 2 ]; - for( int i = 0; i < _msa_props.size(); ++i ) { - seqs_identity[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences(); - seqs_identity[ i ][ 1 ] = ForesterUtil.roundToInt( _msa_props.get( i ).getAverageIdentityRatio() * 100 ); + model.addData( seqs_gaps, "Gap ratio" ); + model.setSeriesLine( "Series " + "Gap ratio", true ); + model.setSeriesMarker( "Series " + "Gap ratio", false ); + if ( _show_msa_qual ) { + final double[][] seqs_identity = new double[ _msa_props.size() ][ 2 ]; + for( int i = 0; i < _msa_props.size(); ++i ) { + seqs_identity[ i ][ 0 ] = _initial_number_of_seqs - _msa_props.get( i ).getNumberOfSequences(); + seqs_identity[ i ][ 1 ] = ForesterUtil + .roundToInt( _msa_props.get( i ).getAverageIdentityRatio() * 100 ); + } + model.addData( seqs_identity, "mean MSA column identity" ); + model.setSeriesLine( "Series " + "mean MSA column identity", true ); + model.setSeriesMarker( "Series " + "mean MSA column identity", false ); } - model.addData( seqs_identity, "Id" ); - model.setSeriesLine( "Series " + "Id", true ); - model.setSeriesMarker( "Series " + "Id", false ); final BoxCoordSystem coord = new BoxCoordSystem( model ); coord.setUnitFont( coord.getUnitFont().deriveFont( 20.0f ) ); coord.setXAxisUnit( "Number of Sequences" ); coord.setPaintGrid( true ); coord.setYAxisUnit( "MSA Length" ); - _chart_panel = new ChartPanel( model, "msa compactor" ); + _chart_panel = new ChartPanel( model, _title ); _chart_panel.setCoordSystem( coord ); final MultiScatterChartRenderer renderer = new MultiScatterChartRenderer( coord, model ); renderer.setAllowBuffer( false ); @@ -133,14 +143,17 @@ public final class Chart extends JDialog implements ActionListener { return _chart_panel; } - public static void display( final List msa_props, final int initial_number_of_seqs ) { + public static void display( final List msa_props, + final int initial_number_of_seqs, + final boolean show_msa_qual, + final String title ) { try { UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() ); } catch ( final Exception e ) { e.printStackTrace(); } - final Chart chart = new Chart( msa_props, initial_number_of_seqs ); + final Chart chart = new Chart( msa_props, initial_number_of_seqs, show_msa_qual, title ); chart.setVisible( true ); } @@ -151,7 +164,7 @@ public final class Chart extends JDialog implements ActionListener { catch ( final Exception e ) { e.printStackTrace(); } - final Chart temp = new Chart( null, 0 ); + final Chart temp = new Chart( null, 0, true, "title" ); temp.setVisible( true ); } } diff --git a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java index 3ade90f..938273d 100644 --- a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java +++ b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java @@ -62,7 +62,7 @@ public class MsaCompactor { final private static NumberFormat NF_4 = new DecimalFormat( "#.####" ); private double _gap_ratio = -1; // - private final String _maffts_opts = "--auto"; + private String _maffts_opts = "--auto"; private int _min_length = -1; // private DeleteableMsa _msa = null; @@ -103,13 +103,19 @@ public class MsaCompactor { return _removed_seq_ids; } - public final void removeViaGapAverage( final double mean_gapiness ) throws IOException, InterruptedException { + public final List removeViaGapAverage( final double mean_gapiness ) throws IOException, + InterruptedException { final GapContribution stats[] = calcGapContribtionsStats( _norm ); final List to_remove_ids = new ArrayList(); + final List msa_props = new ArrayList(); for( final GapContribution gap_gontribution : stats ) { to_remove_ids.add( gap_gontribution.getId() ); } printTableHeader(); + MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); + msa_props.add( msa_prop ); + printMsaProperties( "", msa_prop ); + System.out.println(); int i = 0; while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) { final String id = to_remove_ids.get( i ); @@ -117,30 +123,39 @@ public class MsaCompactor { final Sequence deleted = _msa.deleteRow( id, true ); _removed_seqs.add( deleted ); removeGapColumns(); - if ( ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) - || ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) { - printMsaStatsWriteOutfileAndRealign( _realign, id ); + if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) { + msa_prop = printMsaStatsWriteOutfileAndRealign( _realign, id ); + msa_props.add( msa_prop ); + System.out.println(); } - else { - final MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); + else if ( isPrintMsaStats( i ) ) { + msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); + msa_props.add( msa_prop ); printMsaProperties( id, msa_prop ); + System.out.println(); } - System.out.println(); ++i; } if ( _removed_seqs_out_base != null ) { final String msg = writeAndAlignRemovedSeqs(); + System.out.println(); System.out.println( msg ); } + return msa_props; } - public void removeViaLength( final int length ) throws IOException, InterruptedException { + public List removeViaLength( final int length ) throws IOException, InterruptedException { final GapContribution stats[] = calcGapContribtionsStats( _norm ); final List to_remove_ids = new ArrayList(); + final List msa_props = new ArrayList(); for( final GapContribution gap_gontribution : stats ) { to_remove_ids.add( gap_gontribution.getId() ); } printTableHeader(); + MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); + msa_props.add( msa_prop ); + printMsaProperties( "", msa_prop ); + System.out.println(); int i = 0; while ( _msa.getLength() > length ) { final String id = to_remove_ids.get( i ); @@ -148,28 +163,41 @@ public class MsaCompactor { final Sequence deleted = _msa.deleteRow( id, true ); _removed_seqs.add( deleted ); removeGapColumns(); - if ( ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) || ( _msa.getLength() <= length ) ) { - printMsaStatsWriteOutfileAndRealign( _realign, id ); + if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( _msa.getLength() <= length ) ) { + msa_prop = printMsaStatsWriteOutfileAndRealign( _realign, id ); + msa_props.add( msa_prop ); + System.out.println(); + } + else if ( isPrintMsaStats( i ) ) { + msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); + printMsaProperties( id, msa_prop ); + msa_props.add( msa_prop ); + System.out.println(); } - final MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); - printMsaProperties( id, msa_prop ); - System.out.println(); ++i; } if ( _removed_seqs_out_base != null ) { final String msg = writeAndAlignRemovedSeqs(); + System.out.println(); System.out.println( msg ); } + return msa_props; } - public final void removeWorstOffenders( final int to_remove ) throws IOException, InterruptedException { + public final List removeWorstOffenders( final int to_remove ) throws IOException, + InterruptedException { final GapContribution stats[] = calcGapContribtionsStats( _norm ); final List to_remove_ids = new ArrayList(); + final List msa_props = new ArrayList(); for( int j = 0; j < to_remove; ++j ) { to_remove_ids.add( stats[ j ].getId() ); _removed_seq_ids.add( stats[ j ].getId() ); } printTableHeader(); + MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); + msa_props.add( msa_prop ); + printMsaProperties( "", msa_prop ); + System.out.println(); for( int i = 0; i < to_remove_ids.size(); ++i ) { final String id = to_remove_ids.get( i ); _removed_seq_ids.add( id ); @@ -177,19 +205,23 @@ public class MsaCompactor { _removed_seqs.add( deleted ); removeGapColumns(); if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( i == ( to_remove_ids.size() - 1 ) ) ) { - printMsaStatsWriteOutfileAndRealign( _realign, id ); + msa_prop = printMsaStatsWriteOutfileAndRealign( _realign, id ); + msa_props.add( msa_prop ); System.out.println(); } else if ( isPrintMsaStats( i ) ) { - final MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); + msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); + msa_props.add( msa_prop ); printMsaProperties( id, msa_prop ); System.out.println(); } } if ( _removed_seqs_out_base != null ) { final String msg = writeAndAlignRemovedSeqs(); + System.out.println(); System.out.println( msg ); } + return msa_props; } public final List chart( final int step, final boolean realign, final boolean norm ) @@ -201,12 +233,12 @@ public class MsaCompactor { to_remove_ids.add( gap_gontribution.getId() ); } printTableHeader(); - int i = 0; final int x = ForesterUtil.roundToInt( _msa.getNumberOfSequences() / 20.0 ); MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity ); msa_props.add( msa_prop ); printMsaProperties( "", msa_prop ); System.out.println(); + int i = 0; while ( _msa.getNumberOfSequences() > x ) { final String id = to_remove_ids.get( i ); _msa.deleteRow( id, false ); @@ -232,11 +264,11 @@ public class MsaCompactor { } private final boolean isPrintMsaStats( final int i ) { - return ( ( _step_for_diagnostics < 2 ) || ( ( ( i + 1 ) % _step_for_diagnostics ) == 0 ) ); + return ( ( ( _step < 2 ) && ( _step_for_diagnostics < 2 ) ) || ( ( _step_for_diagnostics > 0 ) && ( ( ( i + 1 ) % _step_for_diagnostics ) == 0 ) ) ); } private final boolean isPrintMsaStatsWriteOutfileAndRealign( final int i ) { - return ( ( _step < 2 ) || ( ( ( i + 1 ) % _step ) == 0 ) ); + return ( ( ( _step < 2 ) && ( _step_for_diagnostics < 2 ) ) || ( ( _step > 0 ) && ( ( ( i + 1 ) % _step ) == 0 ) ) ); } public final void setGapRatio( final double gap_ratio ) { @@ -279,11 +311,11 @@ public class MsaCompactor { _report_aln_mean_identity = report_aln_mean_identity; } - final public String writeMsa( final File outfile, final MSA_FORMAT format, final String suffix ) throws IOException { + final public String writeMsa( final File outfile ) throws IOException { final Double gr = MsaMethods.calcGapRatio( _msa ); final String s = outfile + "_" + _msa.getNumberOfSequences() + "_" + _msa.getLength() + "_" + ForesterUtil.roundToInt( gr * 100 ); - writeMsa( _msa, s + suffix, format ); + writeMsa( _msa, s + obtainSuffix(), _output_format ); return s; } @@ -291,7 +323,7 @@ public class MsaCompactor { final StringBuilder msg = new StringBuilder(); final String n = _removed_seqs_out_base + "_" + _removed_seqs.size() + ".fasta"; SequenceWriter.writeSeqs( _removed_seqs, new File( n ), SEQ_FORMAT.FASTA, 100 ); - msg.append( "wrote " + _removed_seqs.size() + " removed sequences to " + " to \"" + n + "\"" ); + msg.append( "wrote " + _removed_seqs.size() + " removed sequences to " + "\"" + n + "\"" ); if ( _realign ) { final MsaInferrer mafft = Mafft.createInstance( _path_to_mafft ); final List opts = new ArrayList(); @@ -302,13 +334,7 @@ public class MsaCompactor { final Double gr = MsaMethods.calcGapRatio( removed_msa ); String s = _removed_seqs_out_base + "_" + removed_msa.getNumberOfSequences() + "_" + removed_msa.getLength() + "_" + ForesterUtil.roundToInt( gr * 100 ); - String suffix = ""; - if ( _output_format == MSA_FORMAT.FASTA ) { - suffix = ".fasta"; - } - else if ( _output_format == MSA_FORMAT.PHYLIP ) { - suffix = ".aln"; - } + final String suffix = obtainSuffix(); s += suffix; writeMsa( removed_msa, s, _output_format ); msg.append( ", and as MSA of length " + removed_msa.getLength() + " to \"" + s + "\"" ); @@ -316,6 +342,16 @@ public class MsaCompactor { return msg.toString(); } + private String obtainSuffix() { + if ( _output_format == MSA_FORMAT.FASTA ) { + return ".fasta"; + } + else if ( _output_format == MSA_FORMAT.PHYLIP ) { + return ".aln"; + } + return ""; + } + final int calcNonGapResidues( final Sequence seq ) { int ng = 0; for( int i = 0; i < seq.getLength(); ++i ) { @@ -412,10 +448,11 @@ public class MsaCompactor { } private final void printMsaProperties( final String id, final MsaProperties msa_properties ) { - System.out.print( ForesterUtil.pad( id, _longest_id_length, ' ', false ) ); - System.out.print( "\t" ); - final StringBuilder sb = msaPropertiesAsSB( msa_properties ); - System.out.print( sb ); + if ( ( _step < 2 ) || ( _step_for_diagnostics < 2 ) ) { + System.out.print( ForesterUtil.pad( id, _longest_id_length, ' ', false ) ); + System.out.print( "\t" ); + } + System.out.print( msaPropertiesAsSB( msa_properties ) ); System.out.print( "\t" ); } @@ -433,7 +470,7 @@ public class MsaCompactor { return sb; } - final private void printMsaStatsWriteOutfileAndRealign( final boolean realign, final String id ) + final private MsaProperties printMsaStatsWriteOutfileAndRealign( final boolean realign, final String id ) throws IOException, InterruptedException { if ( realign ) { realignWithMafft(); @@ -442,6 +479,7 @@ public class MsaCompactor { printMsaProperties( id, msa_prop ); final String s = writeOutfile(); System.out.print( "-> " + s + ( realign ? "\t(realigned)" : "" ) ); + return msa_prop; } final private void realignWithMafft() throws IOException, InterruptedException { @@ -453,6 +491,10 @@ public class MsaCompactor { _msa = DeleteableMsa.createInstance( mafft.infer( _msa.asSequenceList(), opts ) ); } + public final void setMafftOptions( final String maffts_opts ) { + _maffts_opts = maffts_opts; + } + final private void removeGapColumns() { _msa.deleteGapOnlyColumns(); } @@ -465,8 +507,7 @@ public class MsaCompactor { } private final String writeOutfile() throws IOException { - final String s = writeMsa( _out_file_base, MSA_FORMAT.PHYLIP, ".aln" ); - //writeMsa( _out_file_base, MSA_FORMAT.FASTA, ".fasta" ); + final String s = writeMsa( _out_file_base ); return s; } @@ -503,8 +544,10 @@ public class MsaCompactor { } private final void printTableHeader() { - System.out.print( ForesterUtil.pad( "Id", _longest_id_length, ' ', false ) ); - System.out.print( "\t" ); + if ( ( _step < 2 ) || ( _step_for_diagnostics < 2 ) ) { + System.out.print( ForesterUtil.pad( "Id", _longest_id_length, ' ', false ) ); + System.out.print( "\t" ); + } System.out.print( "Seqs" ); System.out.print( "\t" ); System.out.print( "Length" );