+ public final void removeSequencesByMinimalLength( final int min_effective_length ) {
+ printMsaProperties( "", new MsaProperties( _msa, "", _report_aln_mean_identity ) );
+ System.out.println();
+ _msa = DeleteableMsa.createInstance( MsaMethods.removeSequencesByMinimalLength( _msa, min_effective_length ) );
+ removeGapColumns();
+ printMsaProperties( "", new MsaProperties( _msa, "", _report_aln_mean_identity ) );
+ System.out.println();
+ }
+
+ public final List<MsaProperties> removeViaGapAverage( final double mean_gapiness ) throws IOException,
+ InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( _norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
+ for( final GapContribution gap_gontribution : stats ) {
+ to_remove_ids.add( gap_gontribution.getId() );
+ }
+ printTableHeader();
+ MsaProperties msa_prop = new MsaProperties( _msa, "", _report_aln_mean_identity );
+ msa_props.add( msa_prop );
+ printMsaProperties( "", msa_prop );
+ System.out.println();
+ int i = 0;
+ while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) {
+ final String id = to_remove_ids.get( i );
+ _removed_seq_ids.add( id );
+ final Sequence deleted = _msa.deleteRow( id, true );
+ _removed_seqs.add( deleted );
+ removeGapColumns();
+ if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) {
+ msa_prop = printMsaStatsWriteOutfileAndRealign( _realign, id );
+ msa_props.add( msa_prop );
+ System.out.println();
+ }
+ else if ( isPrintMsaStats( i ) ) {
+ msa_prop = new MsaProperties( _msa, id, _report_aln_mean_identity );
+ msa_props.add( msa_prop );
+ printMsaProperties( id, msa_prop );
+ System.out.println();
+ }
+ ++i;
+ }
+ if ( _removed_seqs_out_base != null ) {
+ final String msg = writeAndAlignRemovedSeqs();
+ System.out.println();
+ System.out.println( msg );
+ }
+ return msa_props;
+ }
+
+ public List<MsaProperties> removeViaLength( final int length ) throws IOException, InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( _norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
+ for( final GapContribution gap_gontribution : stats ) {
+ to_remove_ids.add( gap_gontribution.getId() );
+ }
+ printTableHeader();
+ MsaProperties msa_prop = new MsaProperties( _msa, "", _report_aln_mean_identity );
+ msa_props.add( msa_prop );
+ printMsaProperties( "", msa_prop );
+ System.out.println();
+ int i = 0;
+ while ( _msa.getLength() > length ) {
+ final String id = to_remove_ids.get( i );
+ _removed_seq_ids.add( id );
+ final Sequence deleted = _msa.deleteRow( id, true );
+ _removed_seqs.add( deleted );
+ removeGapColumns();
+ if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( _msa.getLength() <= length ) ) {
+ msa_prop = printMsaStatsWriteOutfileAndRealign( _realign, id );
+ msa_props.add( msa_prop );
+ System.out.println();
+ }
+ else if ( isPrintMsaStats( i ) ) {
+ msa_prop = new MsaProperties( _msa, id, _report_aln_mean_identity );
+ printMsaProperties( id, msa_prop );
+ msa_props.add( msa_prop );
+ System.out.println();
+ }
+ ++i;
+ }
+ if ( _removed_seqs_out_base != null ) {
+ final String msg = writeAndAlignRemovedSeqs();
+ System.out.println();
+ System.out.println( msg );
+ }
+ return msa_props;
+ }
+
+ public final List<MsaProperties> removeWorstOffenders( final int to_remove ) throws IOException,
+ InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( _norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
+ for( int j = 0; j < to_remove; ++j ) {
+ to_remove_ids.add( stats[ j ].getId() );
+ _removed_seq_ids.add( stats[ j ].getId() );
+ }
+ printTableHeader();
+ MsaProperties msa_prop = new MsaProperties( _msa, "", _report_aln_mean_identity );
+ msa_props.add( msa_prop );
+ printMsaProperties( "", msa_prop );
+ System.out.println();
+ for( int i = 0; i < to_remove_ids.size(); ++i ) {
+ final String id = to_remove_ids.get( i );
+ _removed_seq_ids.add( id );
+ final Sequence deleted = _msa.deleteRow( id, true );
+ _removed_seqs.add( deleted );
+ removeGapColumns();
+ if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( i == ( to_remove_ids.size() - 1 ) ) ) {
+ msa_prop = printMsaStatsWriteOutfileAndRealign( _realign, id );
+ msa_props.add( msa_prop );
+ System.out.println();
+ }
+ else if ( isPrintMsaStats( i ) ) {
+ msa_prop = new MsaProperties( _msa, id, _report_aln_mean_identity );
+ msa_props.add( msa_prop );
+ printMsaProperties( id, msa_prop );
+ System.out.println();
+ }
+ }
+ if ( _removed_seqs_out_base != null ) {
+ final String msg = writeAndAlignRemovedSeqs();
+ System.out.println();
+ System.out.println( msg );
+ }
+ return msa_props;
+ }
+
+ public final void setGapRatio( final double gap_ratio ) {
+ _gap_ratio = gap_ratio;
+ }
+
+ public final void setMafftOptions( final String maffts_opts ) {
+ _maffts_opts = maffts_opts;
+ }
+
+ public final void setMinLength( final int min_length ) {
+ _min_length = min_length;
+ }
+
+ public final void setNorm( final boolean norm ) {
+ _norm = norm;
+ }
+
+ final public void setOutFileBase( final File out_file_base ) {
+ _out_file_base = out_file_base;
+ }
+
+ public final void setOutputFormat( final MSA_FORMAT output_format ) {
+ _output_format = output_format;
+ }
+
+ public void setPathToMafft( final String path_to_mafft ) {
+ _path_to_mafft = path_to_mafft;
+ }
+
+ public final void setRealign( final boolean realign ) {
+ _realign = realign;
+ }
+
+ public final void setRemovedSeqsOutBase( final File removed_seqs_out_base ) {
+ _removed_seqs_out_base = removed_seqs_out_base;
+ }
+
+ public final void setReportAlnMeanIdentity( final boolean report_aln_mean_identity ) {
+ _report_aln_mean_identity = report_aln_mean_identity;
+ }
+
+ public final void setStep( final int step ) {
+ _step = step;
+ }
+
+ public final void setStepForDiagnostics( final int step_for_diagnostics ) {
+ _step_for_diagnostics = step_for_diagnostics;
+ }
+
+ final public String writeAndAlignRemovedSeqs() throws IOException, InterruptedException {
+ final StringBuilder msg = new StringBuilder();
+ final String n = _removed_seqs_out_base + "_" + _removed_seqs.size() + ".fasta";
+ SequenceWriter.writeSeqs( _removed_seqs, new File( n ), SEQ_FORMAT.FASTA, 100 );
+ msg.append( "wrote " + _removed_seqs.size() + " removed sequences to " + "\"" + n + "\"" );
+ if ( _realign ) {
+ final MsaInferrer mafft = Mafft.createInstance( _path_to_mafft );
+ final List<String> opts = new ArrayList<String>();
+ for( final String o : _maffts_opts.split( "\\s" ) ) {
+ opts.add( o );
+ }
+ final Msa removed_msa = mafft.infer( _removed_seqs, opts );
+ final Double gr = MsaMethods.calcGapRatio( removed_msa );
+ String s = _removed_seqs_out_base + "_" + removed_msa.getNumberOfSequences() + "_"
+ + removed_msa.getLength() + "_" + ForesterUtil.roundToInt( gr * 100 );
+ final String suffix = obtainSuffix();
+ s += suffix;
+ writeMsa( removed_msa, s, _output_format );
+ msg.append( ", and as MSA of length " + removed_msa.getLength() + " to \"" + s + "\"" );
+ }
+ return msg.toString();
+ }
+
+ final public String writeMsa( final File outfile ) throws IOException {