msa = DeleteableMsa.createInstance( GeneralMsaParser.parse( is ) );
}
final DescriptiveStatistics initial_msa_stats = MsaMethods.calculateEffectiveLengthStatistics( msa );
- //System.out.println( initial_msa_stats.toString() );
- if ( ( cla.isOptionSet( LENGTH_OPTION ) || cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) || cla
- .isOptionSet( AV_GAPINESS_OPTION ) ) && ( out == null ) ) {
+ final boolean chart_only = ( !cla.isOptionSet( LENGTH_OPTION ) )
+ && ( !cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) )
+ && ( !cla.isOptionSet( AV_GAPINESS_OPTION ) ) && ( !cla.isOptionSet( MIN_LENGTH_OPTION ) );
+ if ( !chart_only && ( out == null ) ) {
ForesterUtil.fatalError( PRG_NAME, "outfile file missing" );
}
if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) ) {
}
}
}
- if ( ( !cla.isOptionSet( LENGTH_OPTION ) && !cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) && !cla
- .isOptionSet( AV_GAPINESS_OPTION ) ) && ( ( out != null ) || ( removed_seqs_out_base != null ) ) ) {
- ForesterUtil.fatalError( PRG_NAME,
- "chart only, no outfile(s) produced (no need to indicate output file(s))" );
+ if ( chart_only ) {
+ if ( ( out != null ) || ( removed_seqs_out_base != null ) ) {
+ ForesterUtil
+ .fatalError( PRG_NAME,
+ "chart only, no outfile(s) produced, thus no need to indicate output file(s)" );
+ }
+ if ( !realign && ( step > 1 ) ) {
+ ForesterUtil.fatalError( PRG_NAME,
+ "chart only, no re-aligning, thus no need to use step for output and re-aligning; use -"
+ + STEP_FOR_DIAGNOSTICS_OPTION + " instead" );
+ }
}
ForesterUtil.printProgramInformation( PRG_NAME,
PRG_DESC,
if ( realign ) {
System.out.println( "MAFFT options : " + mafft_options );
}
- if ( min_length > -1 ) {
+ if ( min_length > 1 ) {
System.out.println( "Minimal effective sequence length : " + min_length );
}
if ( gap_ratio > -1 ) {
//
final int initial_number_of_seqs = msa.getNumberOfSequences();
List<MsaProperties> msa_props = null;
- if ( worst_remove > 0 ) {
- final MsaCompactor mc = new MsaCompactor( msa );
- mc.setRealign( realign );
+ final MsaCompactor mc = new MsaCompactor( msa );
+ mc.setNorm( norm );
+ mc.setRealign( realign );
+ if ( realign ) {
+ mc.setPathToMafft( path_to_mafft );
+ mc.setMafftOptions( mafft_options );
+ }
+ mc.setStep( step );
+ mc.setStepForDiagnostics( step_for_diagnostics );
+ mc.setReportAlnMeanIdentity( report_aln_mean_identity );
+ if ( ( worst_remove > 0 ) || ( av_gap > 0 ) || ( length > 0 ) ) {
mc.setOutputFormat( output_format );
- if ( realign ) {
- mc.setPathToMafft( path_to_mafft );
- mc.setMafftOptions( mafft_options );
- }
- mc.setNorm( norm );
- mc.setReportAlnMeanIdentity( report_aln_mean_identity );
mc.setOutFileBase( out );
if ( removed_seqs_out_base != null ) {
mc.setRemovedSeqsOutBase( removed_seqs_out_base );
}
- mc.setStep( step );
- mc.setStepForDiagnostics( step_for_diagnostics );
+ }
+ if ( min_length > 1 ) {
+ mc.removeSequencesByMinimalLength( min_length );
+ mc.writeMsa( new File( "removed" ) );
+ }
+ if ( worst_remove > 0 ) {
msa_props = mc.removeWorstOffenders( worst_remove );
}
else if ( av_gap > 0 ) {
- final MsaCompactor mc = new MsaCompactor( msa );
- mc.setRealign( realign );
- mc.setOutputFormat( output_format );
- if ( realign ) {
- mc.setPathToMafft( path_to_mafft );
- mc.setMafftOptions( mafft_options );
- }
- mc.setNorm( norm );
- mc.setReportAlnMeanIdentity( report_aln_mean_identity );
- mc.setOutFileBase( out );
- if ( removed_seqs_out_base != null ) {
- mc.setRemovedSeqsOutBase( removed_seqs_out_base );
- }
- mc.setStep( step );
- mc.setStepForDiagnostics( step_for_diagnostics );
msa_props = mc.removeViaGapAverage( av_gap );
}
else if ( length > 0 ) {
- final MsaCompactor mc = new MsaCompactor( msa );
- mc.setRealign( realign );
- mc.setOutputFormat( output_format );
- if ( realign ) {
- mc.setPathToMafft( path_to_mafft );
- mc.setMafftOptions( mafft_options );
- }
- mc.setNorm( norm );
- mc.setReportAlnMeanIdentity( report_aln_mean_identity );
- mc.setOutFileBase( out );
- if ( removed_seqs_out_base != null ) {
- mc.setRemovedSeqsOutBase( removed_seqs_out_base );
- }
- mc.setStep( step );
- mc.setStepForDiagnostics( step_for_diagnostics );
msa_props = mc.removeViaLength( length );
}
else {
- //MsaCompactor.chart( msa, step, realign, norm, path_to_mafft );
- final MsaCompactor mc = new MsaCompactor( msa );
- mc.setRealign( realign );
- if ( realign ) {
- mc.setPathToMafft( path_to_mafft );
- mc.setMafftOptions( mafft_options );
- }
- mc.setNorm( norm );
- mc.setReportAlnMeanIdentity( report_aln_mean_identity );
- mc.setOutFileBase( out );
- mc.setStep( step );
- mc.setStepForDiagnostics( step_for_diagnostics );
msa_props = mc.chart( step, realign, norm );
}
Chart.display( msa_props, initial_number_of_seqs, report_aln_mean_identity, in.toString() );
System.out.println( " -" + STEP_OPTION + "=<integer> step for output and re-aligning (default: 1)" );
System.out.println( " -" + STEP_FOR_DIAGNOSTICS_OPTION
+ "=<integer> step for diagnostics reports (default: 1)" );
- // System.out.println( " -" + MIN_LENGTH_OPTION
- // + "=<integer> minimal effecive sequence length (for deleting of shorter sequences)" );
- // System.out.println( " -" + GAP_RATIO_LENGTH_OPTION
- // + "=<decimal> maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)" );
System.out
.println( " -"
+ REPORT_ALN_MEAN_IDENTITY
System.out.println( " -" + OUTPUT_FORMAT_PHYLIP_OPTION
+ " to write output alignments in phylip format instead of fasta" );
System.out.println( " -" + OUTPUT_REMOVED_SEQS_OPTION + "=<file> to output the removed sequences" );
+ System.out.println( " -" + MIN_LENGTH_OPTION
+ + "=<integer> minimal effecive sequence length (for deleting of shorter sequences)" );
+ System.out.println( " -" + GAP_RATIO_LENGTH_OPTION
+ + "=<decimal> maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)" );
System.out.println();
System.out.println();
System.out.println();
for( final GapContribution gap_gontribution : stats ) {
to_remove_ids.add( gap_gontribution.getId() );
}
- printTableHeader();
- final int x = ForesterUtil.roundToInt( _msa.getNumberOfSequences() / 20.0 );
+ final boolean print_id = ( _step < 2 ) && ( _step_for_diagnostics < 2 );
+ printTableHeader( print_id );
+ int x = ForesterUtil.roundToInt( _msa.getNumberOfSequences() / 20.0 );
+ if ( x < 1 ) {
+ x = 1;
+ }
MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity );
msa_props.add( msa_prop );
printMsaProperties( "", msa_prop );
for( final GapContribution gap_gontribution : stats ) {
to_remove_ids.add( gap_gontribution.getId() );
}
- printTableHeader();
+ final boolean print_id = ( _step < 2 ) || ( _step_for_diagnostics < 2 );
+ printTableHeader( print_id );
MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity );
msa_props.add( msa_prop );
printMsaProperties( "", msa_prop );
for( final GapContribution gap_gontribution : stats ) {
to_remove_ids.add( gap_gontribution.getId() );
}
- printTableHeader();
+ final boolean print_id = ( _step < 2 ) || ( _step_for_diagnostics < 2 );
+ printTableHeader( print_id );
MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity );
msa_props.add( msa_prop );
printMsaProperties( "", msa_prop );
return msa_props;
}
+ public final void removeSequencesByMinimalLength( final int min_effective_length ) {
+ printMsaProperties( "", new MsaProperties( _msa, _report_aln_mean_identity ) );
+ System.out.println();
+ _msa = DeleteableMsa.createInstance( MsaMethods.removeSequencesByMinimalLength( _msa, min_effective_length ) );
+ removeGapColumns();
+ printMsaProperties( "", new MsaProperties( _msa, _report_aln_mean_identity ) );
+ System.out.println();
+ }
+
public final List<MsaProperties> removeWorstOffenders( final int to_remove ) throws IOException,
InterruptedException {
final GapContribution stats[] = calcGapContribtionsStats( _norm );
to_remove_ids.add( stats[ j ].getId() );
_removed_seq_ids.add( stats[ j ].getId() );
}
- printTableHeader();
+ final boolean print_id = ( _step < 2 ) || ( _step_for_diagnostics < 2 );
+ printTableHeader( print_id );
MsaProperties msa_prop = new MsaProperties( _msa, _report_aln_mean_identity );
msa_props.add( msa_prop );
printMsaProperties( "", msa_prop );
return msa_props;
}
+ final public void deleteGapColumns( final double max_allowed_gap_ratio ) {
+ _msa.deleteGapColumns( max_allowed_gap_ratio );
+ }
+
public final void setGapRatio( final double gap_ratio ) {
_gap_ratio = gap_ratio;
}
sb.append( msa_properties.getLength() );
sb.append( "\t" );
sb.append( NF_4.format( msa_properties.getGapRatio() ) );
- if ( _report_aln_mean_identity /*msa_properties.getAverageIdentityRatio() >= 0*/) {
+ if ( _report_aln_mean_identity ) {
sb.append( "\t" );
sb.append( NF_4.format( msa_properties.getAverageIdentityRatio() ) );
}
return msa_prop;
}
- private final void printTableHeader() {
- if ( ( _step < 2 ) || ( _step_for_diagnostics < 2 ) ) {
+ private final void printTableHeader( final boolean print_id ) {
+ if ( print_id ) {
System.out.print( ForesterUtil.pad( "Id", _longest_id_length, ' ', false ) );
System.out.print( "\t" );
}