import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.evoinference.tools.BootstrapResampler;
import org.forester.msa.BasicMsa;
+import org.forester.msa.DeleteableMsa;
import org.forester.msa.Mafft;
import org.forester.msa.Msa;
import org.forester.msa.Msa.MSA_FORMAT;
final private static NumberFormat NF_3 = new DecimalFormat( "#.###" );
final private static NumberFormat NF_4 = new DecimalFormat( "#.####" );
- private final String _maffts_opts = "--retree 1";
- private Msa _msa;
+ // private final String _maffts_opts = "--retree 1";
+ private final String _maffts_opts = "--auto";
+ private DeleteableMsa _msa;
private File _out_file_base;
private String _path_to_mafft;
private final SortedSet<String> _removed_seq_ids;
NF_3.setRoundingMode( RoundingMode.HALF_UP );
}
- private MsaCompactor( final Msa msa ) {
+ private MsaCompactor( final DeleteableMsa msa ) {
_msa = msa;
_removed_seq_ids = new TreeSet<String>();
}
final Phylogeny master_phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa, true, matrix );
final int seed = 15;
final int n = 100;
- final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
+ final ResampleableMsa resampleable_msa = new ResampleableMsa( _msa );
final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa.getLength(),
n,
seed );
return gappiness;
}
+ final private List<MsaProperties> chart( final int step,
+ final boolean realign,
+ final boolean norm,
+ final boolean verbose ) throws IOException, InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
+ for( final GapContribution gap_gontribution : stats ) {
+ to_remove_ids.add( gap_gontribution.getId() );
+ }
+ if ( verbose ) {
+ printTableHeader();
+ }
+ int i = 0;
+ final int s = _msa.getNumberOfSequences();
+ final int x = ForesterUtil.roundToInt( s / 20.0 );
+ while ( _msa.getNumberOfSequences() > x ) {
+ final String id = to_remove_ids.get( i );
+ //~_msa = MsaMethods.removeSequence( _msa, id );
+ _msa.deleteRow( id );
+ if ( ( s < 500 ) || ( ( step > 0 ) && ( ( ( i + 1 ) % step ) == 0 ) ) ) {
+ removeGapColumns();
+ if ( realign && ( ( step > 0 ) && ( ( ( i + 1 ) % step ) == 0 ) ) ) {
+ realignWithMafft();
+ msa_props.add( new MsaProperties( _msa ) );
+ if ( verbose ) {
+ printMsaStats( id );
+ }
+ if ( verbose ) {
+ System.out.print( "(realigned)" );
+ }
+ }
+ else {
+ msa_props.add( new MsaProperties( _msa ) );
+ if ( verbose ) {
+ printMsaStats( id );
+ }
+ }
+ if ( verbose ) {
+ System.out.println();
+ }
+ }
+ ++i;
+ }
+ return msa_props;
+ }
+
private Phylogeny inferNJphylogeny( final PWD_DISTANCE_METHOD pwd_distance_method,
final Msa msa,
final boolean write_matrix,
System.out.print( "\t" );
}
+ final private void printMsaStatsWriteOutfileAndRealign( final boolean realign,
+ final boolean verbose,
+ final String id ) throws IOException, InterruptedException {
+ if ( realign ) {
+ realignWithMafft();
+ }
+ if ( verbose ) {
+ printMsaStats( id );
+ }
+ final String s = writeOutfile();
+ if ( verbose ) {
+ System.out.print( "-> " + s + ( realign ? "\t(realigned)" : "" ) );
+ }
+ }
+
final private void realignWithMafft() throws IOException, InterruptedException {
// final MsaInferrer mafft = Mafft
// .createInstance( "/home/czmasek/SOFTWARE/MSA/MAFFT/mafft-7.130-without-extensions/scripts/mafft" );
//opts.add( "1000" );
//opts.add( "--localpair" );
//opts.add( "--quiet" );
- _msa = mafft.infer( _msa.asSequenceList(), opts );
+ _msa = new DeleteableMsa( ( BasicMsa ) mafft.infer( _msa.asSequenceList(), opts ) );
}
final private void removeGapColumns() {
- _msa = MsaMethods.createInstance().removeGapColumns( 1, 0, _msa );
+ //~ _msa = MsaMethods.createInstance().removeGapColumns( 1, 0, _msa );
+ MsaMethods.removeGapColumns( 1, _msa );
}
final private void removeViaGapAverage( final double mean_gapiness,
int i = 0;
while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) {
final String id = to_remove_ids.get( i );
- _msa = MsaMethods.removeSequence( _msa, id );
+ //`_msa = MsaMethods.removeSequence( _msa, id );
+ _msa.deleteRow( id );
removeGapColumns();
- if ( verbose ) {
- printMsaStats( id );
- }
if ( ( ( step > 0 ) && ( ( ( i + 1 ) % step ) == 0 ) )
|| ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) {
- if ( realign ) {
- realignWithMafft();
- }
- final String s = writeOutfile();
- if ( verbose ) {
- System.out.print( "-> " + s );
- }
+ printMsaStatsWriteOutfileAndRealign( realign, verbose, id );
}
- if ( verbose ) {
- System.out.println();
- }
- ++i;
- }
- }
-
- final private List<MsaProperties> chart( final boolean realign, final boolean norm, final boolean verbose )
- throws IOException, InterruptedException {
- final GapContribution stats[] = calcGapContribtionsStats( norm );
- final List<String> to_remove_ids = new ArrayList<String>();
- final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
- for( final GapContribution gap_gontribution : stats ) {
- to_remove_ids.add( gap_gontribution.getId() );
- }
- if ( verbose ) {
- printTableHeader();
- }
- int i = 0;
- final int x = ForesterUtil.roundToInt( _msa.getNumberOfSequences() / 20.0 );
- while ( _msa.getNumberOfSequences() > x ) {
- final String id = to_remove_ids.get( i );
- _msa = MsaMethods.removeSequence( _msa, id );
- removeGapColumns();
- msa_props.add( new MsaProperties( _msa ) );
- if ( verbose ) {
+ else if ( verbose ) {
printMsaStats( id );
}
- if ( realign ) {
- realignWithMafft();
- }
if ( verbose ) {
System.out.println();
}
++i;
}
- return msa_props;
}
final private void removeViaLength( final int length,
int i = 0;
while ( _msa.getLength() > length ) {
final String id = to_remove_ids.get( i );
- _msa = MsaMethods.removeSequence( _msa, id );
+ //~_msa = MsaMethods.removeSequence( _msa, id );
+ _msa.deleteRow( id );
removeGapColumns();
- if ( verbose ) {
- printMsaStats( id );
- }
if ( ( ( step > 0 ) && ( ( ( i + 1 ) % step ) == 0 ) ) || ( _msa.getLength() <= length ) ) {
- if ( realign ) {
- realignWithMafft();
- }
- final String s = writeOutfile();
- if ( verbose ) {
- System.out.print( "-> " + s );
- }
+ printMsaStatsWriteOutfileAndRealign( realign, verbose, id );
+ }
+ else if ( verbose ) {
+ printMsaStats( id );
}
if ( verbose ) {
System.out.println();
}
for( int i = 0; i < to_remove_ids.size(); ++i ) {
final String id = to_remove_ids.get( i );
- _msa = MsaMethods.removeSequence( _msa, id );
+ //~ _msa = MsaMethods.removeSequence( _msa, id );
+ _msa.deleteRow( id );
removeGapColumns();
- if ( verbose ) {
- printMsaStats( id );
- }
if ( ( ( step > 0 ) && ( ( ( i + 1 ) % step ) == 0 ) ) || ( i == ( to_remove_ids.size() - 1 ) ) ) {
- if ( realign ) {
- realignWithMafft();
- }
- final String s = writeOutfile();
- if ( verbose ) {
- System.out.print( "-> " + s );
- }
+ printMsaStatsWriteOutfileAndRealign( realign, verbose, id );
+ }
+ else if ( verbose ) {
+ printMsaStats( id );
}
if ( verbose ) {
System.out.println();
return s;
}
+ public final static MsaCompactor chart( final DeleteableMsa msa,
+ final int step,
+ final boolean realign,
+ final boolean norm,
+ final String path_to_mafft ) throws IOException, InterruptedException {
+ final int initial_number_of_seqs = msa.getNumberOfSequences();
+ final MsaCompactor mc = new MsaCompactor( msa );
+ if ( realign ) {
+ mc.setPathToMafft( path_to_mafft );
+ }
+ final List<MsaProperties> msa_props = mc.chart( step, realign, norm, true );
+ Chart.display( msa_props, initial_number_of_seqs );
+ return mc;
+ }
+
// Returns null if not path found.
final public static String guessPathToMafft() {
String path;
return null;
}
- public final static MsaCompactor reduceGapAverage( final Msa msa,
+ public final static MsaCompactor reduceGapAverage( final DeleteableMsa msa,
final double max_gap_average,
final int step,
final boolean realign,
return mc;
}
- public final static MsaCompactor reduceLength( final Msa msa,
+ public final static MsaCompactor reduceLength( final DeleteableMsa msa,
final int length,
final int step,
final boolean realign,
return mc;
}
- public final static MsaCompactor chart( final Msa msa,
- final boolean realign,
- final boolean norm,
- final String path_to_mafft ) throws IOException, InterruptedException {
- final MsaCompactor mc = new MsaCompactor( msa );
- if ( realign ) {
- mc.setPathToMafft( path_to_mafft );
- }
- final List<MsaProperties> msa_props = mc.chart( realign, norm, true );
- Chart.display( msa_props );
- return mc;
- }
-
- public final static MsaCompactor removeWorstOffenders( final Msa msa,
+ public final static MsaCompactor removeWorstOffenders( final DeleteableMsa msa,
final int worst_offenders_to_remove,
final int step,
final boolean realign,