From 93f740ca4fbd156ba93b07b8e43112f392645c1d Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Fri, 18 Apr 2014 23:04:53 +0000 Subject: [PATCH] --- .../org/forester/application/msa_compactor.java | 34 ++++- forester/java/src/org/forester/msa/MsaMethods.java | 8 + .../java/src/org/forester/msa_compactor/Chart.java | 153 ++++++++++++++++++++ .../forester/msa_compactor/GapContribution.java | 23 +++ .../org/forester/msa_compactor/MsaCompactor.java | 80 ++++++++-- .../org/forester/msa_compactor/MsaProperties.java | 69 +++++++++ forester/java/src/org/forester/test/Test.java | 5 +- 7 files changed, 356 insertions(+), 16 deletions(-) create mode 100644 forester/java/src/org/forester/msa_compactor/Chart.java create mode 100644 forester/java/src/org/forester/msa_compactor/MsaProperties.java diff --git a/forester/java/src/org/forester/application/msa_compactor.java b/forester/java/src/org/forester/application/msa_compactor.java index 1cd94d6..45f66c1 100644 --- a/forester/java/src/org/forester/application/msa_compactor.java +++ b/forester/java/src/org/forester/application/msa_compactor.java @@ -1,3 +1,26 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2014 Christian M. Zmasek +// Copyright (C) 2014 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; @@ -35,12 +58,16 @@ public class msa_compactor { public static void main( final String args[] ) { try { final CommandLineArguments cla = new CommandLineArguments( args ); - if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) || ( cla.getNumberOfNames() != 2 ) ) { + if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) + || ( ( cla.getNumberOfNames() < 1 ) || ( cla.getNumberOfNames() > 2 ) ) ) { printHelp(); System.exit( 0 ); } final File in = cla.getFile( 0 ); - final File out = cla.getFile( 1 ); + File out = null; + if ( cla.getNumberOfNames() > 1 ) { + out = cla.getFile( 1 ); + } int worst_remove = -1; double av_gap = -1; int length = -1; @@ -130,6 +157,9 @@ public class msa_compactor { // TODO if < shortest seq -> error MsaCompactor.reduceLength( msa, length, step, realign, norm, path_to_mafft, out ); } + else { + MsaCompactor.chart( msa, realign, norm, path_to_mafft ); + } } catch ( final Exception e ) { e.printStackTrace(); diff --git a/forester/java/src/org/forester/msa/MsaMethods.java b/forester/java/src/org/forester/msa/MsaMethods.java index ac6cb1f..45c9697 100644 --- a/forester/java/src/org/forester/msa/MsaMethods.java +++ b/forester/java/src/org/forester/msa/MsaMethods.java @@ -159,6 +159,14 @@ public final class MsaMethods { return BasicMsa.createInstance( seqs ); } + public static DescriptiveStatistics calculateIdentityRatio( final int from, final int to, final Msa msa ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( int c = from; c <= to; ++c ) { + stats.addValue( calculateIdentityRatio( msa, c ) ); + } + return stats; + } + public static double calculateIdentityRatio( final Msa msa, final int column ) { final SortedMap dist = calculateResidueDestributionPerColumn( msa, column ); int majority_count = 0; diff --git a/forester/java/src/org/forester/msa_compactor/Chart.java b/forester/java/src/org/forester/msa_compactor/Chart.java new file mode 100644 index 0000000..75f3a04 --- /dev/null +++ b/forester/java/src/org/forester/msa_compactor/Chart.java @@ -0,0 +1,153 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2014 Christian M. Zmasek +// Copyright (C) 2014 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester + +package org.forester.msa_compactor; + +import java.awt.BorderLayout; +import java.awt.event.ActionListener; +import java.util.ArrayList; +import java.util.List; + +import javax.swing.JDialog; +import javax.swing.JMenu; +import javax.swing.JMenuBar; +import javax.swing.JMenuItem; +import javax.swing.JPanel; +import javax.swing.UIManager; +import javax.swing.WindowConstants; + +import com.approximatrix.charting.coordsystem.BoxCoordSystem; +import com.approximatrix.charting.model.MultiScatterDataModel; +import com.approximatrix.charting.render.MultiScatterChartRenderer; +import com.approximatrix.charting.swing.ChartPanel; + +public final class Chart extends JDialog implements ActionListener { + + private static final long serialVersionUID = -5292420246132943515L; + ChartPanel _chart_panel = null; + final JMenuItem _m_exit = new JMenuItem(); + List _msa_props; + + Chart( final List msa_props ) { + super(); + _msa_props = msa_props; + setTitle( "msa compactor" ); + setSize( 500, 400 ); + setResizable( true ); + final JPanel content_pane = new JPanel(); + content_pane.setLayout( new BorderLayout() ); + setContentPane( content_pane ); + final JMenuBar menu_bar = new JMenuBar(); + final JMenu file_menu = new JMenu(); + file_menu.setText( "File" ); + _m_exit.setText( "Exit" ); + file_menu.add( _m_exit ); + menu_bar.add( file_menu ); + setJMenuBar( menu_bar ); + setDefaultCloseOperation( WindowConstants.DISPOSE_ON_CLOSE ); + _m_exit.addActionListener( this ); + content_pane.add( obtainChartPanel(), BorderLayout.CENTER ); + } + + @Override + public void actionPerformed( final java.awt.event.ActionEvent e ) { + if ( e.getSource() == _m_exit ) { + dispose(); + } + } + + private ChartPanel obtainChartPanel() { + if ( _chart_panel == null ) { + final MultiScatterDataModel model = new MultiScatterDataModel(); + if ( ( _msa_props == null ) || _msa_props.isEmpty() ) { + _msa_props = new ArrayList(); + final MsaProperties p0 = new MsaProperties( 10, 200, 0.5, 0.1 ); + final MsaProperties p1 = new MsaProperties( 9, 190, 0.49, 0.2 ); + final MsaProperties p2 = new MsaProperties( 8, 150, 0.2, 0.3 ); + final MsaProperties p3 = new MsaProperties( 7, 145, 0.2, 0.4 ); + _msa_props.add( p0 ); + _msa_props.add( p1 ); + _msa_props.add( p2 ); + _msa_props.add( p3 ); + } + final double[][] seqs_length = new double[ _msa_props.size() ][ 2 ]; + for( int i = 0; i < _msa_props.size(); ++i ) { + seqs_length[ i ][ 0 ] = _msa_props.get( i ).getNumberOfSequences(); + seqs_length[ i ][ 1 ] = _msa_props.get( i ).getLength(); + } + model.addData( seqs_length, "Length" ); + model.setSeriesLine( "Series " + "Length", true ); + model.setSeriesMarker( "Series " + "Length", true ); + // final double[][] seqs_gaps = new double[ _msa_props.size() ][ 2 ]; + // for( int i = 0; i < _msa_props.size(); ++i ) { + // seqs_gaps[ i ][ 0 ] = _msa_props.get( i ).getNumberOfSequences(); + // seqs_gaps[ i ][ 1 ] = _msa_props.get( i ).getGapRatio(); + // } + // model.addData( seqs_gaps, "Gaps" ); + // model.setSeriesLine( "Series " + "Gaps", true ); + // model.setSeriesMarker( "Series " + "Gaps", true ); + // final double[][] seqs_identity = new double[ _msa_props.size() ][ 2 ]; + // for( int i = 0; i < _msa_props.size(); ++i ) { + // seqs_identity[ i ][ 0 ] = _msa_props.get( i ).getNumberOfSequences(); + // seqs_identity[ i ][ 1 ] = _msa_props.get( i ).getAverageIdentityRatio(); + // } + // model.addData( seqs_identity, "Id" ); + // model.setSeriesLine( "Series " + "Id", false ); + // model.setSeriesMarker( "Series " + "Id", true ); + final BoxCoordSystem coord = new BoxCoordSystem( model ); + coord.setUnitFont( coord.getUnitFont().deriveFont( 20.0f ) ); + coord.setXAxisUnit( "Number of Sequences" ); + coord.setPaintGrid( true ); + coord.setYAxisUnit( "MSA Length" ); + _chart_panel = new ChartPanel( model, "msa compactor" ); + _chart_panel.setCoordSystem( coord ); + final MultiScatterChartRenderer renderer = new MultiScatterChartRenderer( coord, model ); + renderer.setAllowBuffer( false ); + _chart_panel.addChartRenderer( renderer, 0 ); + } + return _chart_panel; + } + + public static void display( final List msa_props ) { + try { + UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() ); + } + catch ( final Exception e ) { + e.printStackTrace(); + } + final Chart chart = new Chart( msa_props ); + chart.setVisible( true ); + } + + public static void main( final String[] args ) { + try { + UIManager.setLookAndFeel( UIManager.getSystemLookAndFeelClassName() ); + } + catch ( final Exception e ) { + e.printStackTrace(); + } + final Chart temp = new Chart( null ); + temp.setVisible( true ); + } +} diff --git a/forester/java/src/org/forester/msa_compactor/GapContribution.java b/forester/java/src/org/forester/msa_compactor/GapContribution.java index b7b3403..355b697 100644 --- a/forester/java/src/org/forester/msa_compactor/GapContribution.java +++ b/forester/java/src/org/forester/msa_compactor/GapContribution.java @@ -1,3 +1,26 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2014 Christian M. Zmasek +// Copyright (C) 2014 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa_compactor; diff --git a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java index 47e1b9c..4eff35e 100644 --- a/forester/java/src/org/forester/msa_compactor/MsaCompactor.java +++ b/forester/java/src/org/forester/msa_compactor/MsaCompactor.java @@ -1,3 +1,26 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2014 Christian M. Zmasek +// Copyright (C) 2014 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.msa_compactor; @@ -29,8 +52,6 @@ import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.sequence.Sequence; import org.forester.tools.ConfidenceAssessor; -import org.forester.util.BasicDescriptiveStatistics; -import org.forester.util.DescriptiveStatistics; import org.forester.util.ForesterUtil; public class MsaCompactor { @@ -175,7 +196,7 @@ public class MsaCompactor { sb.append( "\t" ); sb.append( NF_4.format( MsaMethods.calcGapRatio( _msa ) ) ); sb.append( "\t" ); - sb.append( NF_4.format( calculateIdentityRatio( 0, _msa.getLength() - 1, _msa ).arithmeticMean() ) ); + sb.append( NF_4.format( MsaMethods.calculateIdentityRatio( 0, _msa.getLength() - 1, _msa ).arithmeticMean() ) ); return sb; } @@ -244,6 +265,38 @@ public class MsaCompactor { } } + final private List chart( final boolean realign, final boolean norm, final boolean verbose ) + throws IOException, InterruptedException { + final GapContribution stats[] = calcGapContribtionsStats( norm ); + final List to_remove_ids = new ArrayList(); + final List msa_props = new ArrayList(); + for( final GapContribution gap_gontribution : stats ) { + to_remove_ids.add( gap_gontribution.getId() ); + } + if ( verbose ) { + printTableHeader(); + } + int i = 0; + final int x = ForesterUtil.roundToInt( _msa.getNumberOfSequences() / 20.0 ); + while ( _msa.getNumberOfSequences() > x ) { + final String id = to_remove_ids.get( i ); + _msa = MsaMethods.removeSequence( _msa, id ); + removeGapColumns(); + msa_props.add( new MsaProperties( _msa ) ); + if ( verbose ) { + printMsaStats( id ); + } + if ( realign ) { + realignWithMafft(); + } + if ( verbose ) { + System.out.println(); + } + ++i; + } + return msa_props; + } + final private void removeViaLength( final int length, final int step, final boolean realign, @@ -397,6 +450,19 @@ public class MsaCompactor { return mc; } + public final static MsaCompactor chart( final Msa msa, + final boolean realign, + final boolean norm, + final String path_to_mafft ) throws IOException, InterruptedException { + final MsaCompactor mc = new MsaCompactor( msa ); + if ( realign ) { + mc.setPathToMafft( path_to_mafft ); + } + final List msa_props = mc.chart( realign, norm, true ); + Chart.display( msa_props ); + return mc; + } + public final static MsaCompactor removeWorstOffenders( final Msa msa, final int worst_offenders_to_remove, final int step, @@ -413,14 +479,6 @@ public class MsaCompactor { return mc; } - private static DescriptiveStatistics calculateIdentityRatio( final int from, final int to, final Msa msa ) { - final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( int c = from; c <= to; ++c ) { - stats.addValue( MsaMethods.calculateIdentityRatio( msa, c ) ); - } - return stats; - } - private final static void printTableHeader() { System.out.print( ForesterUtil.pad( "Id", 20, ' ', false ) ); System.out.print( "\t" ); diff --git a/forester/java/src/org/forester/msa_compactor/MsaProperties.java b/forester/java/src/org/forester/msa_compactor/MsaProperties.java new file mode 100644 index 0000000..57cc40c --- /dev/null +++ b/forester/java/src/org/forester/msa_compactor/MsaProperties.java @@ -0,0 +1,69 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2014 Christian M. Zmasek +// Copyright (C) 2014 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester + +package org.forester.msa_compactor; + +import org.forester.msa.Msa; +import org.forester.msa.MsaMethods; + +public final class MsaProperties { + + final private double _average_identity_ratio; + final private double _gap_ratio; + final private int _length; + final private int _number_of_sequences; + + public MsaProperties( final Msa msa ) { + _number_of_sequences = msa.getNumberOfSequences(); + _length = msa.getLength(); + _gap_ratio = MsaMethods.calcGapRatio( msa ); + _average_identity_ratio = MsaMethods.calculateIdentityRatio( 0, msa.getLength() - 1, msa ).arithmeticMean(); + } + + public MsaProperties( final int number_of_sequences, + final int length, + final double gap_ratio, + final double average_identity_ratio ) { + _number_of_sequences = number_of_sequences; + _length = length; + _gap_ratio = gap_ratio; + _average_identity_ratio = average_identity_ratio; + } + + public final double getAverageIdentityRatio() { + return _average_identity_ratio; + } + + public final double getGapRatio() { + return _gap_ratio; + } + + public final int getLength() { + return _length; + } + + public final int getNumberOfSequences() { + return _number_of_sequences; + } +} diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 5ff446f..924b643 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -2,8 +2,8 @@ // FORESTER -- software libraries and applications // for evolutionary biology research and applications. // -// Copyright (C) 2008-2009 Christian M. Zmasek -// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// Copyright (C) 2014 Christian M. Zmasek +// Copyright (C) 2014 Sanford-Burnham Medical Research Institute // All rights reserved // // This library is free software; you can redistribute it and/or @@ -20,7 +20,6 @@ // License along with this library; if not, write to the Free Software // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // -// Contact: phylosoft @ gmail . com // WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.test; -- 1.7.10.2