From: cmzmasek@gmail.com Date: Thu, 17 Apr 2014 17:11:52 +0000 (+0000) Subject: inprogress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=36de46cc85f62604f71a8b788d2aabdfe6b80935;p=jalview.git inprogress --- diff --git a/forester/java/src/org/forester/application/msa_compactor.java b/forester/java/src/org/forester/application/msa_compactor.java index fd035fe..7947bdd 100644 --- a/forester/java/src/org/forester/application/msa_compactor.java +++ b/forester/java/src/org/forester/application/msa_compactor.java @@ -64,9 +64,17 @@ public class msa_compactor { worst_remove = cla.getOptionValueAsInt( REMOVE_WORST_OFFENDERS_OPTION ); } if ( cla.isOptionSet( AV_GAPINESS_OPTION ) ) { + if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) ) { + printHelp(); + System.exit( 0 ); + } av = cla.getOptionValueAsDouble( AV_GAPINESS_OPTION ); } if ( cla.isOptionSet( LENGTH_OPTION ) ) { + if ( cla.isOptionSet( REMOVE_WORST_OFFENDERS_OPTION ) || cla.isOptionSet( AV_GAPINESS_OPTION ) ) { + printHelp(); + System.exit( 0 ); + } length = cla.getOptionValueAsInt( LENGTH_OPTION ); } if ( cla.isOptionSet( STEP_OPTION ) ) { @@ -98,12 +106,11 @@ public class msa_compactor { else { msa = GeneralMsaParser.parse( is ); } - MsaCompactor mc = null; if ( worst_remove > 0 ) { - mc = MsaCompactor.removeWorstOffenders( msa, worst_remove, step, realign, norm, path_to_mafft, out ); + MsaCompactor.removeWorstOffenders( msa, worst_remove, step, realign, norm, path_to_mafft, out ); } else if ( av > 0 ) { - mc = MsaCompactor.reduceGapAverage( msa, av, step, realign, norm, path_to_mafft, out ); + MsaCompactor.reduceGapAverage( msa, av, step, realign, norm, path_to_mafft, out ); } else if ( length > 0 ) { if ( length >= msa.getLength() ) { @@ -111,7 +118,7 @@ public class msa_compactor { + ") is greater than or equal to MSA original length (" + msa.getLength() + ")" ); } // TODO if < shortest seq -> error - mc = MsaCompactor.reduceLength( msa, length, step, realign, norm, path_to_mafft, out ); + MsaCompactor.reduceLength( msa, length, step, realign, norm, path_to_mafft, out ); } } catch ( final Exception e ) { diff --git a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java index 3092060..88deac8 100644 --- a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java +++ b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java @@ -35,12 +35,13 @@ import org.forester.util.ForesterUtil; public class MsaCompactor { - final private static NumberFormat NF_3 = new DecimalFormat( "#.###" ); - final private static NumberFormat NF_4 = new DecimalFormat( "#.####" ); + final private static NumberFormat NF_3 = new DecimalFormat( "#.###" ); + final private static NumberFormat NF_4 = new DecimalFormat( "#.####" ); private Msa _msa; private File _out_file_base; private String _path_to_mafft; private final SortedSet _removed_seq_ids; + private final String _maffts_opts = "--retree 1"; static { NF_4.setRoundingMode( RoundingMode.HALF_UP ); NF_3.setRoundingMode( RoundingMode.HALF_UP ); @@ -158,7 +159,6 @@ public class MsaCompactor { m.write( ForesterUtil.createBufferedWriter( matrix_name ) ); } catch ( final IOException e ) { - // TODO Auto-generated catch block e.printStackTrace(); } } @@ -173,9 +173,9 @@ public class MsaCompactor { sb.append( "\t" ); sb.append( _msa.getLength() ); sb.append( "\t" ); - sb.append( NF_3.format( MsaMethods.calcGapRatio( _msa ) ) ); + sb.append( NF_4.format( MsaMethods.calcGapRatio( _msa ) ) ); sb.append( "\t" ); - sb.append( NF_3.format( calculateIdentityRatio( 0, _msa.getLength() - 1, _msa ).arithmeticMean() ) ); + sb.append( NF_4.format( calculateIdentityRatio( 0, _msa.getLength() - 1, _msa ).arithmeticMean() ) ); return sb; } @@ -184,10 +184,13 @@ public class MsaCompactor { // .createInstance( "/home/czmasek/SOFTWARE/MSA/MAFFT/mafft-7.130-without-extensions/scripts/mafft" ); final MsaInferrer mafft = Mafft.createInstance( _path_to_mafft ); final List opts = new ArrayList(); - opts.add( "--maxiterate" ); - opts.add( "1000" ); - opts.add( "--localpair" ); - opts.add( "--quiet" ); + for( final String o : _maffts_opts.split( "\\s" ) ) { + opts.add( o ); + } + //opts.add( "--maxiterate" ); + //opts.add( "1000" ); + //opts.add( "--localpair" ); + //opts.add( "--quiet" ); _msa = mafft.infer( _msa.asSequenceList(), opts ); } @@ -205,6 +208,9 @@ public class MsaCompactor { for( final GapContribution gap_gontribution : stats ) { to_remove_ids.add( gap_gontribution.getId() ); } + if ( verbose ) { + printTableHeader(); + } int i = 0; while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) { final String id = to_remove_ids.get( i ); @@ -233,6 +239,20 @@ public class MsaCompactor { } } + private final static void printTableHeader() { + System.out.print( ForesterUtil.pad( "Id", 20, ' ', false ) ); + System.out.print( "\t" ); + System.out.print( "Seqs" ); + System.out.print( "\t" ); + System.out.print( "Length" ); + System.out.print( "\t" ); + System.out.print( "Gaps" ); + System.out.print( "\t" ); + System.out.print( "MSA qual" ); + System.out.print( "\t" ); + System.out.println(); + } + final private void removeViaLength( final int length, final int step, final boolean realign, @@ -243,6 +263,9 @@ public class MsaCompactor { for( final GapContribution gap_gontribution : stats ) { to_remove_ids.add( gap_gontribution.getId() ); } + if ( verbose ) { + printTableHeader(); + } int i = 0; while ( _msa.getLength() > length ) { final String id = to_remove_ids.get( i ); @@ -282,6 +305,9 @@ public class MsaCompactor { to_remove_ids.add( stats[ j ].getId() ); _removed_seq_ids.add( stats[ j ].getId() ); } + if ( verbose ) { + printTableHeader(); + } for( int i = 0; i < to_remove_ids.size(); ++i ) { final String id = to_remove_ids.get( i ); _msa = MsaMethods.removeSequence( _msa, id ); @@ -333,6 +359,10 @@ public class MsaCompactor { return path; } } + path = "/home/czmasek/SOFTWARE/MSA/MAFFT/mafft-7.130-without-extensions/scripts/mafft"; + if ( MsaInferrer.isInstalled( path ) ) { + return path; + } path = "/usr/local/bin/mafft"; if ( MsaInferrer.isInstalled( path ) ) { return path;