+ public final void removeSequencesByMinimalLength( final int min_effective_length ) throws IOException {
+ _msa = DeleteableMsa.createInstance( MsaMethods.removeSequencesByMinimalLength( _msa, min_effective_length ) );
+ removeGapColumns();
+ final String s = writeOutfile();
+ final DescriptiveStatistics msa_stats = MsaMethods.calculateEffectiveLengthStatistics( _msa );
+ System.out.println( "Output MSA : " + s );
+ System.out.println( " MSA length : " + _msa.getLength() );
+ System.out.println( " Number of sequences : " + _msa.getNumberOfSequences() );
+ System.out.println( " Median sequence length : " + NF_1.format( msa_stats.median() ) );
+ System.out.println( " Mean sequence length : " + NF_1.format( msa_stats.arithmeticMean() ) );
+ System.out.println( " Max sequence length : " + ( ( int ) msa_stats.getMax() ) );
+ System.out.println( " Min sequence length : " + ( ( int ) msa_stats.getMin() ) );
+ System.out.println( " Gap ratio : " + NF_4.format( MsaMethods.calcGapRatio( _msa ) ) );
+ System.out.println( " Normalized Shannon Entropy (entn21): "
+ + NF_4.format( MsaMethods.calcNormalizedShannonsEntropy( 21, _msa ) ) );
+ System.out.println();
+ }
+
+ public final List<MsaProperties> removeViaGapAverage( final double mean_gapiness ) throws IOException,
+ InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( _norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
+ for( final GapContribution gap_gontribution : stats ) {
+ to_remove_ids.add( gap_gontribution.getId() );
+ }
+ Phylogeny phy = null;
+ if ( _phylogentic_inference ) {
+ System.out.println( "calculating phylogentic tree..." );
+ System.out.println();
+ phy = calcTree();
+ addSeqs2Tree( _msa, phy );
+ }
+ printTableHeader();
+ MsaProperties msa_prop = new MsaProperties( _msa, "", _calculate_shannon_entropy );
+ msa_props.add( msa_prop );
+ printMsaProperties( msa_prop );
+ System.out.println();
+ int i = 0;
+ while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) {
+ final String id = to_remove_ids.get( i );
+ _removed_seq_ids.add( id );
+ final MolecularSequence deleted = _msa.deleteRow( id, true );
+ _removed_seqs.add( deleted );
+ removeGapColumns();
+ if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) {
+ msa_prop = printMsaStatsWriteOutfileAndRealign( _realign, id );
+ msa_props.add( msa_prop );
+ System.out.println();
+ }
+ else if ( isPrintMsaStats( i ) ) {
+ msa_prop = new MsaProperties( _msa, id, _calculate_shannon_entropy );
+ msa_props.add( msa_prop );
+ printMsaProperties( msa_prop );
+ System.out.println();
+ }
+ ++i;
+ }
+ if ( _removed_seqs_out_base != null ) {
+ final String msg = writeAndAlignRemovedSeqs();
+ System.out.println();
+ System.out.println( msg );
+ }
+ if ( _phylogentic_inference ) {
+ decorateTree( phy, msa_props, false );
+ displayTree( phy );
+ System.out.println( "calculating phylogentic tree..." );
+ System.out.println();
+ final Phylogeny phy2 = calcTree();
+ addSeqs2Tree( _msa, phy2 );
+ displayTree( phy2 );
+ }
+
+
+ return msa_props;
+ }
+
+ public List<MsaProperties> removeViaLength( final int length ) throws IOException, InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( _norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
+ for( final GapContribution gap_gontribution : stats ) {
+ to_remove_ids.add( gap_gontribution.getId() );
+ }
+ Phylogeny phy = null;
+ if ( _phylogentic_inference ) {
+ System.out.println( "calculating phylogentic tree..." );
+ System.out.println();
+ phy = calcTree();
+ addSeqs2Tree( _msa, phy );
+ }
+ printTableHeader();
+ MsaProperties msa_prop = new MsaProperties( _msa, "", _calculate_shannon_entropy );
+ msa_props.add( msa_prop );
+ printMsaProperties( msa_prop );
+ System.out.println();
+ int i = 0;
+ while ( _msa.getLength() > length ) {
+ final String id = to_remove_ids.get( i );
+ _removed_seq_ids.add( id );
+ final MolecularSequence deleted = _msa.deleteRow( id, true );
+ _removed_seqs.add( deleted );
+ removeGapColumns();
+ if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( _msa.getLength() <= length ) ) {
+ msa_prop = printMsaStatsWriteOutfileAndRealign( _realign, id );
+ msa_props.add( msa_prop );
+ System.out.println();
+ }
+ else if ( isPrintMsaStats( i ) ) {
+ msa_prop = new MsaProperties( _msa, id, _calculate_shannon_entropy );
+ printMsaProperties( msa_prop );
+ msa_props.add( msa_prop );
+ System.out.println();
+ }
+ ++i;
+ }
+ if ( _removed_seqs_out_base != null ) {
+ final String msg = writeAndAlignRemovedSeqs();
+ System.out.println();
+ System.out.println( msg );
+ }
+ if ( _phylogentic_inference ) {
+ decorateTree( phy, msa_props, false );
+ displayTree( phy );
+ System.out.println( "calculating phylogentic tree..." );
+ System.out.println();
+ final Phylogeny phy2 = calcTree();
+ addSeqs2Tree( _msa, phy2 );
+ displayTree( phy2 );
+ }
+
+ return msa_props;
+ }
+
+ public final List<MsaProperties> removeWorstOffenders( final int to_remove ) throws IOException,
+ InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( _norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
+ for( int j = 0; j < to_remove; ++j ) {
+ to_remove_ids.add( stats[ j ].getId() );
+ }
+ Phylogeny phy = null;
+ if ( _phylogentic_inference ) {
+ System.out.println( "calculating phylogentic tree..." );
+ System.out.println();
+ phy = calcTree();
+ addSeqs2Tree( _msa, phy );
+ }
+ printTableHeader();
+ MsaProperties msa_prop = new MsaProperties( _msa, "", _calculate_shannon_entropy );
+ msa_props.add( msa_prop );
+ printMsaProperties( msa_prop );
+ System.out.println();
+ for( int i = 0; i < to_remove_ids.size(); ++i ) {
+ final String id = to_remove_ids.get( i );
+ _removed_seq_ids.add( id );
+ final MolecularSequence deleted = _msa.deleteRow( id, true );
+ _removed_seqs.add( deleted );
+ removeGapColumns();
+ if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( i == ( to_remove_ids.size() - 1 ) ) ) {
+ msa_prop = printMsaStatsWriteOutfileAndRealign( _realign, id );
+ msa_props.add( msa_prop );
+ System.out.println();
+ }
+ else if ( isPrintMsaStats( i ) ) {
+ msa_prop = new MsaProperties( _msa, id, _calculate_shannon_entropy );
+ msa_props.add( msa_prop );
+ printMsaProperties( msa_prop );
+ System.out.println();
+ }
+ }
+ if ( _removed_seqs_out_base != null ) {
+ final String msg = writeAndAlignRemovedSeqs();
+ System.out.println();
+ System.out.println( msg );
+ }
+ if ( _phylogentic_inference ) {
+ decorateTree( phy, msa_props, false );
+ displayTree( phy );
+ System.out.println( "calculating phylogentic tree..." );
+ System.out.println();
+ final Phylogeny phy2 = calcTree();
+ addSeqs2Tree( _msa, phy2 );
+ displayTree( phy2 );
+ }
+
+ return msa_props;
+ }
+
+ public final void setCalculateNormalizedShannonEntropy( final boolean calculate_shannon_entropy ) {
+ _calculate_shannon_entropy = calculate_shannon_entropy;
+ }
+
+ public void setInfileName( final String infile_name ) {
+ _infile_name = infile_name;
+ }
+
+ public final void setMafftOptions( final String maffts_opts ) {
+ _maffts_opts = maffts_opts;
+ }
+
+ public final void setNorm( final boolean norm ) {
+ _norm = norm;