+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2014 Christian M. Zmasek
+// Copyright (C) 2014 Sanford-Burnham Medical Research Institute
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.application;
public static void main( final String args[] ) {
try {
final CommandLineArguments cla = new CommandLineArguments( args );
- if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( cla.getNumberOfNames() != 2 ) ) {
+ if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 )
+ || ( ( cla.getNumberOfNames() < 1 ) || ( cla.getNumberOfNames() > 2 ) ) ) {
printHelp();
System.exit( 0 );
}
final File in = cla.getFile( 0 );
- final File out = cla.getFile( 1 );
+ File out = null;
+ if ( cla.getNumberOfNames() > 1 ) {
+ out = cla.getFile( 1 );
+ }
int worst_remove = -1;
double av_gap = -1;
int length = -1;
// TODO if < shortest seq -> error
MsaCompactor.reduceLength( msa, length, step, realign, norm, path_to_mafft, out );
}
+ else {
+ MsaCompactor.chart( msa, realign, norm, path_to_mafft );
+ }
}
catch ( final Exception e ) {
e.printStackTrace();
return BasicMsa.createInstance( seqs );
}
+ public static DescriptiveStatistics calculateIdentityRatio( final int from, final int to, final Msa msa ) {
+ final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+ for( int c = from; c <= to; ++c ) {
+ stats.addValue( calculateIdentityRatio( msa, c ) );
+ }
+ return stats;
+ }
+
public static double calculateIdentityRatio( final Msa msa, final int column ) {
final SortedMap<Character, Integer> dist = calculateResidueDestributionPerColumn( msa, column );
int majority_count = 0;
--- /dev/null
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2014 Christian M. Zmasek
+// Copyright (C) 2014 Sanford-Burnham Medical Research Institute
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
+
+package org.forester.msa_compactor;
+
+import java.awt.BorderLayout;
+import java.awt.event.ActionListener;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.swing.JDialog;
+import javax.swing.JMenu;
+import javax.swing.JMenuBar;
+import javax.swing.JMenuItem;
+import javax.swing.JPanel;
+import javax.swing.UIManager;
+import javax.swing.WindowConstants;
+
+import com.approximatrix.charting.coordsystem.BoxCoordSystem;
+import com.approximatrix.charting.model.MultiScatterDataModel;
+import com.approximatrix.charting.render.MultiScatterChartRenderer;
+import com.approximatrix.charting.swing.ChartPanel;
+
+public final class Chart extends JDialog implements ActionListener {
+
+ private static final long serialVersionUID = -5292420246132943515L;
+ ChartPanel _chart_panel = null;
+ final JMenuItem _m_exit = new JMenuItem();
+ List<MsaProperties> _msa_props;
+
+ Chart( final List<MsaProperties> msa_props ) {
+ super();
+ _msa_props = msa_props;
+ setTitle( "msa compactor" );
+ setSize( 500, 400 );
+ setResizable( true );
+ final JPanel content_pane = new JPanel();
+ content_pane.setLayout( new BorderLayout() );
+ setContentPane( content_pane );
+ final JMenuBar menu_bar = new JMenuBar();
+ final JMenu file_menu = new JMenu();
+ file_menu.setText( "File" );
+ _m_exit.setText( "Exit" );
+ file_menu.add( _m_exit );
+ menu_bar.add( file_menu );
+ setJMenuBar( menu_bar );
+ setDefaultCloseOperation( WindowConstants.DISPOSE_ON_CLOSE );
+ _m_exit.addActionListener( this );
+ content_pane.add( obtainChartPanel(), BorderLayout.CENTER );
+ }
+
+ @Override
+ public void actionPerformed( final java.awt.event.ActionEvent e ) {
+ if ( e.getSource() == _m_exit ) {
+ dispose();
+ }
+ }
+
+ private ChartPanel obtainChartPanel() {
+ if ( _chart_panel == null ) {
+ final MultiScatterDataModel model = new MultiScatterDataModel();
+ if ( ( _msa_props == null ) || _msa_props.isEmpty() ) {
+ _msa_props = new ArrayList<MsaProperties>();
+ final MsaProperties p0 = new MsaProperties( 10, 200, 0.5, 0.1 );
+ final MsaProperties p1 = new MsaProperties( 9, 190, 0.49, 0.2 );
+ final MsaProperties p2 = new MsaProperties( 8, 150, 0.2, 0.3 );
+ final MsaProperties p3 = new MsaProperties( 7, 145, 0.2, 0.4 );
+ _msa_props.add( p0 );
+ _msa_props.add( p1 );
+ _msa_props.add( p2 );
+ _msa_props.add( p3 );
+ }
+ final double[][] seqs_length = new double[ _msa_props.size() ][ 2 ];
+ for( int i = 0; i < _msa_props.size(); ++i ) {
+ seqs_length[ i ][ 0 ] = _msa_props.get( i ).getNumberOfSequences();
+ seqs_length[ i ][ 1 ] = _msa_props.get( i ).getLength();
+ }
+ model.addData( seqs_length, "Length" );
+ model.setSeriesLine( "Series " + "Length", true );
+ model.setSeriesMarker( "Series " + "Length", true );
+ // final double[][] seqs_gaps = new double[ _msa_props.size() ][ 2 ];
+ // for( int i = 0; i < _msa_props.size(); ++i ) {
+ // seqs_gaps[ i ][ 0 ] = _msa_props.get( i ).getNumberOfSequences();
+ // seqs_gaps[ i ][ 1 ] = _msa_props.get( i ).getGapRatio();
+ // }
+ // model.addData( seqs_gaps, "Gaps" );
+ // model.setSeriesLine( "Series " + "Gaps", true );
+ // model.setSeriesMarker( "Series " + "Gaps", true );
+ // final double[][] seqs_identity = new double[ _msa_props.size() ][ 2 ];
+ // for( int i = 0; i < _msa_props.size(); ++i ) {
+ // seqs_identity[ i ][ 0 ] = _msa_props.get( i ).getNumberOfSequences();
+ // seqs_identity[ i ][ 1 ] = _msa_props.get( i ).getAverageIdentityRatio();
+ // }
+ // model.addData( seqs_identity, "Id" );
+ // model.setSeriesLine( "Series " + "Id", false );
+ // model.setSeriesMarker( "Series " + "Id", true );
+ final BoxCoordSystem coord = new BoxCoordSystem( model );
+ coord.setUnitFont( coord.getUnitFont().deriveFont( 20.0f ) );
+ coord.setXAxisUnit( "Number of Sequences" );
+ coord.setPaintGrid( true );
+ coord.setYAxisUnit( "MSA Length" );
+ _chart_panel = new ChartPanel( model, "msa compactor" );
+ _chart_panel.setCoordSystem( coord );
+ final MultiScatterChartRenderer renderer = new MultiScatterChartRenderer( coord, model );
+ renderer.setAllowBuffer( false );
+ _chart_panel.addChartRenderer( renderer, 0 );
+ }
+ return _chart_panel;
+ }
+
+ public static void display( final List<MsaProperties> msa_props ) {
+ try {
+ UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() );
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace();
+ }
+ final Chart chart = new Chart( msa_props );
+ chart.setVisible( true );
+ }
+
+ public static void main( final String[] args ) {
+ try {
+ UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() );
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace();
+ }
+ final Chart temp = new Chart( null );
+ temp.setVisible( true );
+ }
+}
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2014 Christian M. Zmasek
+// Copyright (C) 2014 Sanford-Burnham Medical Research Institute
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.msa_compactor;
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2014 Christian M. Zmasek
+// Copyright (C) 2014 Sanford-Burnham Medical Research Institute
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.msa_compactor;
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.sequence.Sequence;
import org.forester.tools.ConfidenceAssessor;
-import org.forester.util.BasicDescriptiveStatistics;
-import org.forester.util.DescriptiveStatistics;
import org.forester.util.ForesterUtil;
public class MsaCompactor {
sb.append( "\t" );
sb.append( NF_4.format( MsaMethods.calcGapRatio( _msa ) ) );
sb.append( "\t" );
- sb.append( NF_4.format( calculateIdentityRatio( 0, _msa.getLength() - 1, _msa ).arithmeticMean() ) );
+ sb.append( NF_4.format( MsaMethods.calculateIdentityRatio( 0, _msa.getLength() - 1, _msa ).arithmeticMean() ) );
return sb;
}
}
}
+ final private List<MsaProperties> chart( final boolean realign, final boolean norm, final boolean verbose )
+ throws IOException, InterruptedException {
+ final GapContribution stats[] = calcGapContribtionsStats( norm );
+ final List<String> to_remove_ids = new ArrayList<String>();
+ final List<MsaProperties> msa_props = new ArrayList<MsaProperties>();
+ for( final GapContribution gap_gontribution : stats ) {
+ to_remove_ids.add( gap_gontribution.getId() );
+ }
+ if ( verbose ) {
+ printTableHeader();
+ }
+ int i = 0;
+ final int x = ForesterUtil.roundToInt( _msa.getNumberOfSequences() / 20.0 );
+ while ( _msa.getNumberOfSequences() > x ) {
+ final String id = to_remove_ids.get( i );
+ _msa = MsaMethods.removeSequence( _msa, id );
+ removeGapColumns();
+ msa_props.add( new MsaProperties( _msa ) );
+ if ( verbose ) {
+ printMsaStats( id );
+ }
+ if ( realign ) {
+ realignWithMafft();
+ }
+ if ( verbose ) {
+ System.out.println();
+ }
+ ++i;
+ }
+ return msa_props;
+ }
+
final private void removeViaLength( final int length,
final int step,
final boolean realign,
return mc;
}
+ public final static MsaCompactor chart( final Msa msa,
+ final boolean realign,
+ final boolean norm,
+ final String path_to_mafft ) throws IOException, InterruptedException {
+ final MsaCompactor mc = new MsaCompactor( msa );
+ if ( realign ) {
+ mc.setPathToMafft( path_to_mafft );
+ }
+ final List<MsaProperties> msa_props = mc.chart( realign, norm, true );
+ Chart.display( msa_props );
+ return mc;
+ }
+
public final static MsaCompactor removeWorstOffenders( final Msa msa,
final int worst_offenders_to_remove,
final int step,
return mc;
}
- private static DescriptiveStatistics calculateIdentityRatio( final int from, final int to, final Msa msa ) {
- final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
- for( int c = from; c <= to; ++c ) {
- stats.addValue( MsaMethods.calculateIdentityRatio( msa, c ) );
- }
- return stats;
- }
-
private final static void printTableHeader() {
System.out.print( ForesterUtil.pad( "Id", 20, ' ', false ) );
System.out.print( "\t" );
--- /dev/null
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2014 Christian M. Zmasek
+// Copyright (C) 2014 Sanford-Burnham Medical Research Institute
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
+
+package org.forester.msa_compactor;
+
+import org.forester.msa.Msa;
+import org.forester.msa.MsaMethods;
+
+public final class MsaProperties {
+
+ final private double _average_identity_ratio;
+ final private double _gap_ratio;
+ final private int _length;
+ final private int _number_of_sequences;
+
+ public MsaProperties( final Msa msa ) {
+ _number_of_sequences = msa.getNumberOfSequences();
+ _length = msa.getLength();
+ _gap_ratio = MsaMethods.calcGapRatio( msa );
+ _average_identity_ratio = MsaMethods.calculateIdentityRatio( 0, msa.getLength() - 1, msa ).arithmeticMean();
+ }
+
+ public MsaProperties( final int number_of_sequences,
+ final int length,
+ final double gap_ratio,
+ final double average_identity_ratio ) {
+ _number_of_sequences = number_of_sequences;
+ _length = length;
+ _gap_ratio = gap_ratio;
+ _average_identity_ratio = average_identity_ratio;
+ }
+
+ public final double getAverageIdentityRatio() {
+ return _average_identity_ratio;
+ }
+
+ public final double getGapRatio() {
+ return _gap_ratio;
+ }
+
+ public final int getLength() {
+ return _length;
+ }
+
+ public final int getNumberOfSequences() {
+ return _number_of_sequences;
+ }
+}
// FORESTER -- software libraries and applications
// for evolutionary biology research and applications.
//
-// Copyright (C) 2008-2009 Christian M. Zmasek
-// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// Copyright (C) 2014 Christian M. Zmasek
+// Copyright (C) 2014 Sanford-Burnham Medical Research Institute
// All rights reserved
//
// This library is free software; you can redistribute it and/or
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
-// Contact: phylosoft @ gmail . com
// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.test;