+ if ( cla.isOptionSet( MAFFT_OPTIONS ) ) {
+ mafft_options = cla.getOptionValueAsCleanString( MAFFT_OPTIONS );
+ if ( ForesterUtil.isEmpty( mafft_options ) || ( mafft_options.length() < 3 ) ) {
+ ForesterUtil.fatalError( PRG_NAME, "illegal or empty MAFFT options: " + mafft_options );
+ }
+ }
+ }
+ else if ( cla.isOptionSet( MAFFT_OPTIONS ) ) {
+ ForesterUtil.fatalError( PRG_NAME, "no need to indicate MAFFT options without realigning" );
+ }
+ if ( cla.isOptionSet( PERFORM_PHYLOGENETIC_INFERENCE ) ) {
+ perform_phylogenetic_inference = true;
+ }
+ if ( chart_only ) {
+ if ( ( out != null ) || ( removed_seqs_out_base != null ) ) {
+ ForesterUtil
+ .fatalError( PRG_NAME,
+ "chart only, no outfile(s) produced, thus no need to indicate output file(s)" );
+ }
+ if ( !realign && cla.isOptionSet( STEP_OPTION ) ) {
+ ForesterUtil.fatalError( PRG_NAME,
+ "chart only, no re-aligning, thus no need to use step for output and re-aligning; use -"
+ + STEP_FOR_DIAGNOSTICS_OPTION + " instead" );
+ }
+ }
+ if ( perform_phylogenetic_inference ) {
+ if ( step_for_diagnostics != 1 ) {
+ ForesterUtil.fatalError( PRG_NAME,
+ "step for diagnostics reports needs to be set to 1 for tree calculation" );
+ }
+ }
+ ForesterUtil.printProgramInformation( PRG_NAME,
+ PRG_DESC,
+ PRG_VERSION,
+ PRG_DATE,
+ E_MAIL,
+ WWW,
+ ForesterUtil.getForesterLibraryInformation() );
+ System.out.println( "Input MSA : " + in );
+ System.out.println( " MSA length : " + msa.getLength() );
+ System.out.println( " Number of sequences : " + msa.getNumberOfSequences() );
+ System.out.println( " Median sequence length : " + NF_1.format( initial_msa_stats.median() ) );
+ System.out.println( " Mean sequence length : "
+ + NF_1.format( initial_msa_stats.arithmeticMean() ) );
+ System.out.println( " Max sequence length : " + ( ( int ) initial_msa_stats.getMax() ) );
+ System.out.println( " Min sequence length : " + ( ( int ) initial_msa_stats.getMin() ) );
+ if ( !chart_only ) {
+ System.out.println( "Output : " + out );
+ }
+ else {
+ System.out.println( "Output : n/a" );
+ }
+ if ( removed_seqs_out_base != null ) {
+ System.out.println( "Write removed sequences to : " + removed_seqs_out_base );
+ }
+ if ( worst_remove > 0 ) {
+ System.out.println( "Number of worst offenders to remove : " + worst_remove );
+ }
+ if ( av_gap > 0 ) {
+ System.out.println( "Target gap-ratio : " + av_gap );
+ }
+ if ( length > 0 ) {
+ System.out.println( "Target MSA length : " + length );
+ }
+ if ( min_length > 1 ) {
+ System.out.println( "Minimal effective sequence length : " + min_length );
+ }
+ if ( gap_ratio > -1 ) {
+ System.out.println( "Maximum allowed gap ratio per column : " + gap_ratio );
+ }
+ if ( ( out != null ) || ( removed_seqs_out_base != null ) ) {
+ System.out.println( "Output format : "
+ + ( output_format == MSA_FORMAT.FASTA ? "fasta" : "phylip" ) );
+ }
+ if ( chart_only && !realign ) {
+ System.out.println( "Step for output and re-aligning : n/a" );
+ }
+ else {
+ if ( chart_only ) {
+ System.out.println( "Step for re-aligning : " + step );
+ }
+ else {
+ System.out.println( "Step for output and re-aligning : " + step );
+ }
+ }
+ System.out.println( "Step for diagnostics reports : " + step_for_diagnostics );
+ System.out.println( "Calculate normalized Shannon Entropy : " + report_entropy );
+ if ( !norm ) {
+ System.out.println( "Normalize : " + norm );
+ }
+ System.out.println( "Realign with MAFFT : " + realign );
+ if ( realign ) {
+ System.out.println( "MAFFT options : " + mafft_options );
+ }
+ System.out.println( "Simple tree (Kimura distances, NJ) : " + perform_phylogenetic_inference );
+ System.out.println();
+ final int initial_number_of_seqs = msa.getNumberOfSequences();
+ List<MsaProperties> msa_props = null;
+ final MsaCompactor mc = new MsaCompactor( msa );
+ mc.setInfileName( in.getName() );
+ mc.setNorm( norm );
+ mc.setRealign( realign );
+ if ( realign ) {
+ mc.setPathToMafft( path_to_mafft );
+ mc.setMafftOptions( mafft_options );
+ }
+ mc.setStep( step );
+ mc.setStepForDiagnostics( step_for_diagnostics );
+ mc.setCalculateNormalizedShannonEntropy( report_entropy );
+ mc.setPeformPhylogenticInference( perform_phylogenetic_inference );
+ if ( ( worst_remove > 0 ) || ( av_gap > 0 ) || ( length > 0 ) ) {
+ mc.setOutputFormat( output_format );
+ mc.setOutFileBase( out );
+ if ( removed_seqs_out_base != null ) {
+ mc.setRemovedSeqsOutBase( removed_seqs_out_base );
+ }
+ }
+ if ( min_length > 1 ) {
+ mc.removeSequencesByMinimalLength( min_length );
+ mc.writeMsa( new File( "removed" ) );