From eccc2fdb674f76be1815fd7984295661bff8a2be Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 9 Feb 2011 01:09:13 +0000 Subject: [PATCH] initial commit --- .../org/forester/development/AbstractRenderer.java | 93 + .../org/forester/development/DevelopmentTools.java | 184 ++ .../src/org/forester/development/HmmerRest.java | 168 ++ .../src/org/forester/development/MsaRenderer.java | 408 ++++ .../src/org/forester/development/RandomThing.java | 17 + .../org/forester/development/ResidueRenderer.java | 235 ++ .../java/src/org/forester/development/Test.java | 91 + .../evoinference/TestPhylogenyReconstruction.java | 2359 ++++++++++++++++++++ .../evoinference/distance/NeighborJoining.java | 227 ++ .../distance/PairwiseDistanceCalculator.java | 181 ++ .../character/BasicCharacterStateMatrix.java | 538 +++++ .../matrix/character/CharacterStateMatrix.java | 138 ++ .../matrix/character/DiscreteState.java | 80 + .../distance/BasicSymmetricalDistanceMatrix.java | 183 ++ .../matrix/distance/DistanceMatrix.java | 47 + .../evoinference/parsimony/DolloParsimony.java | 396 ++++ .../evoinference/parsimony/FitchParsimony.java | 539 +++++ .../evoinference/parsimony/SankoffParsimony.java | 546 +++++ .../evoinference/tools/BootstrapResampler.java | 91 + 19 files changed, 6521 insertions(+) create mode 100644 forester/java/src/org/forester/development/AbstractRenderer.java create mode 100644 forester/java/src/org/forester/development/DevelopmentTools.java create mode 100644 forester/java/src/org/forester/development/HmmerRest.java create mode 100644 forester/java/src/org/forester/development/MsaRenderer.java create mode 100644 forester/java/src/org/forester/development/RandomThing.java create mode 100644 forester/java/src/org/forester/development/ResidueRenderer.java create mode 100644 forester/java/src/org/forester/development/Test.java create mode 100644 forester/java/src/org/forester/evoinference/TestPhylogenyReconstruction.java create mode 100644 forester/java/src/org/forester/evoinference/distance/NeighborJoining.java create mode 100644 forester/java/src/org/forester/evoinference/distance/PairwiseDistanceCalculator.java create mode 100644 forester/java/src/org/forester/evoinference/matrix/character/BasicCharacterStateMatrix.java create mode 100644 forester/java/src/org/forester/evoinference/matrix/character/CharacterStateMatrix.java create mode 100644 forester/java/src/org/forester/evoinference/matrix/character/DiscreteState.java create mode 100644 forester/java/src/org/forester/evoinference/matrix/distance/BasicSymmetricalDistanceMatrix.java create mode 100644 forester/java/src/org/forester/evoinference/matrix/distance/DistanceMatrix.java create mode 100644 forester/java/src/org/forester/evoinference/parsimony/DolloParsimony.java create mode 100644 forester/java/src/org/forester/evoinference/parsimony/FitchParsimony.java create mode 100644 forester/java/src/org/forester/evoinference/parsimony/SankoffParsimony.java create mode 100644 forester/java/src/org/forester/evoinference/tools/BootstrapResampler.java diff --git a/forester/java/src/org/forester/development/AbstractRenderer.java b/forester/java/src/org/forester/development/AbstractRenderer.java new file mode 100644 index 0000000..dbea137 --- /dev/null +++ b/forester/java/src/org/forester/development/AbstractRenderer.java @@ -0,0 +1,93 @@ +// $Id: +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.development; + +import java.awt.Color; +import java.awt.Graphics; + +import javax.swing.JComponent; + +public abstract class AbstractRenderer extends JComponent { + + /** + * + */ + private static final long serialVersionUID = 7236434322552764776L; + static final Color DEFAULT_COLOR = new Color( 0, 0, 0 ); + static final Color MARKED_COLOR = new Color( 255, 255, 0 ); + static final Color USER_FLAGGED_COLOR = new Color( 255, 0, 255 ); + static final Color SELECTED_COLOR = new Color( 255, 0, 0 ); + int _x; + int _y; + int _well_size; + byte _status; + + public AbstractRenderer() { + } + + abstract MsaRenderer getParentPlateRenderer(); + + byte getStatus() { + return _status; + } + + int getWellSize() { + return _well_size; + } + + @Override + public int getX() { + return _x; + } + + @Override + public int getY() { + return _y; + } + + abstract boolean isSelected(); + + @Override + public abstract void paint( Graphics g ); + + abstract void setIsSelected( boolean flag ); + + void setStatus( final byte status ) { + _status = status; + } + + void setWellSize( final int well_size ) { + _well_size = well_size; + } + + void setX( final int x ) { + _x = x; + } + + void setY( final int y ) { + _y = y; + } +} diff --git a/forester/java/src/org/forester/development/DevelopmentTools.java b/forester/java/src/org/forester/development/DevelopmentTools.java new file mode 100644 index 0000000..d41194a --- /dev/null +++ b/forester/java/src/org/forester/development/DevelopmentTools.java @@ -0,0 +1,184 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.development; + +import java.util.Random; + +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyMethods; +import org.forester.phylogeny.PhylogenyNode; + +public final class DevelopmentTools { + + /** + * Creates a completely unbalanced Phylogeny with i external nodes. + * + * @return a newly created unbalanced Phylogeny + */ + // public static Phylogeny createUnbalancedTree( int i ) { + // + // Phylogeny t1 = null; + // + // try { + // PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + // t1 = factory.create( ":S=", new SimpleNHXParser() ); + // + // t1.setRooted( true ); + // + // for ( int j = 1; j < i; ++j ) { + // t1.addNodeAndConnect( "", "" ); + // } + // t1.setRoot( t1.getFirstExternalNode().getRoot() ); + // t1.calculateRealHeight(); + // } + // + // catch ( PhylogenyParserException e ) { + // System.err + // .println( "Unexpected exception during \"createUnbalancedTree\":" ); + // System.err.println( e.toString() ); + // System.exit( -1 ); + // } + // + // return t1; + // } + private DevelopmentTools() { + } + + /** + * Creates a completely balanced rooted phylogeny with a given number of levels and + * children per node. + * + * @param levels + * @param children_per_node + * @return a completely balanced rooted phylogeny + */ + public static Phylogeny createBalancedPhylogeny( final int levels, final int children_per_node ) { + final PhylogenyNode root = new PhylogenyNode(); + final Phylogeny p = new Phylogeny(); + p.setRoot( root ); + p.setRooted( true ); + DevelopmentTools.createBalancedPhylogenyRecursion( levels, children_per_node, root ); + return p; + } + + private static void createBalancedPhylogenyRecursion( int current_level, + final int children_per_node, + final PhylogenyNode current_node ) { + if ( current_level > 0 ) { + --current_level; + for( int i = 0; i < children_per_node; ++i ) { + final PhylogenyNode new_node = new PhylogenyNode(); + current_node.addAsChild( new_node ); + DevelopmentTools.createBalancedPhylogenyRecursion( current_level, children_per_node, new_node ); + } + } + } + + /** + * Sets the species name of the external Nodes of Phylogeny t to 1, 1+i, 2, + * 2+i, 3, 3+i, .... Examples: i=2: 1, 3, 2, 4 i=4: 1, 5, 2, 6, 3, 7, 4, 8 + * i=8: 1, 9, 2, 10, 3, 11, 4, 12, ... + */ + public static void intervalNumberSpecies( final Phylogeny t, final int i ) { + if ( ( t == null ) || t.isEmpty() ) { + return; + } + PhylogenyNode n = t.getFirstExternalNode(); + int j = 1; + boolean odd = true; + while ( n != null ) { + if ( odd ) { + PhylogenyMethods.setScientificName( n, j + "" ); + } + else { + PhylogenyMethods.setScientificName( n, ( j + i ) + "" ); + j++; + } + odd = !odd; + n = n.getNextExternalNode(); + } + } + + /** + * Sets the species namea of the external Nodes of Phylogeny t to descending + * integers, ending with 1. + */ + public static void numberSpeciesInDescOrder( final Phylogeny t ) { + if ( ( t == null ) || t.isEmpty() ) { + return; + } + PhylogenyNode n = t.getFirstExternalNode(); + int j = t.getRoot().getNumberOfExternalNodes(); + while ( n != null ) { + PhylogenyMethods.setTaxonomyCode( n, j + "" ); + j--; + n = n.getNextExternalNode(); + } + } + + /** + * Sets the species namea of the external Nodes of Phylogeny t to ascending + * integers, starting with 1. + */ + public static void numberSpeciesInOrder( final Phylogeny t ) { + if ( ( t == null ) || t.isEmpty() ) { + return; + } + PhylogenyNode n = t.getFirstExternalNode(); + int j = 1; + while ( n != null ) { + PhylogenyMethods.setScientificName( n, j + "" ); + j++; + n = n.getNextExternalNode(); + } + } + + /** + * Sets the species names of the external Nodes of Phylogeny t to a random + * positive integer number between (and including) min and max. + * + * @param t + * whose external species names are to be randomized + * @param min + * minimal value for random numbers + * @param max + * maximum value for random numbers + */ + public static void randomizeSpecies( final int min, final int max, final Phylogeny t ) { + if ( ( t == null ) || t.isEmpty() ) { + return; + } + final int mi = Math.abs( min ); + final int ma = Math.abs( max ); + final Random r = new Random(); + PhylogenyNode n = t.getFirstExternalNode(); + while ( n != null ) { + final String code = ( ( Math.abs( r.nextInt() ) % ( ma - mi + 1 ) ) + mi ) + ""; + PhylogenyMethods.setTaxonomyCode( n, code ); + n = n.getNextExternalNode(); + } + } +} \ No newline at end of file diff --git a/forester/java/src/org/forester/development/HmmerRest.java b/forester/java/src/org/forester/development/HmmerRest.java new file mode 100644 index 0000000..d9e23c9 --- /dev/null +++ b/forester/java/src/org/forester/development/HmmerRest.java @@ -0,0 +1,168 @@ +// $Id: +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.development; + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.PrintStream; +import java.net.URL; +import java.net.URLConnection; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +public class HmmerRest { + + final static String LIST_SEPARATOR = "%0A"; + final static String LINE_SEPARATOR = "\n"; + private final static String BASE_URL = "http://pfam.sanger.ac.uk/search/sequence"; + + public static void main( final String[] args ) { + final String seq = "MASTENNEKDNFMRDTASRSKKSRRRSLWIAAGAVPTAIALSLSLASPA" + + "AVAQSSFGSSDIIDSGVLDSITRGLTDYLTPRDEALPAGEVTYPAIEGLP" + + "AGVRVNSAEYVTSHHVVLSIQSAAMPERPIKVQLLLPRDWYSSPDRDFPE" + + "IWALDGLRAIEKQSGWTIETNIEQFFADKNAIVVLPVGGESSFYTDWNEP" + + "NNGKNYQWETFLTEELAPILDKGFRSNGERAITGISMGGTAAVNIATHNP" + + "EMFNFVGSFSGYLDTTSNGMPAAIGAALADAGGYNVNAMWGPAGSERWLE" + + "NDPKRNVDQLRGKQVYVSAGSGADDYGQDGSVATGPANAAGVGLELISRM" + + "TSQTFVDAANGAGVNVIANFRPSGVHAWPYWQFEMTQAWPYMADSLGMSR" + + "EDRGADCVALGAIADATADGSLGSCLNNEYLVANGVGRAQDFTNGRAYWS" + + "PNTGAFGLFGRINARYSELGGPDSWLGFPKTRELSTPDGRGRYVHFENGS" + + "IYWSAATGPWEIPGDMFTAWGTQGYEAGGLGYPVGPAKDFNGGLAQEFQG" + + "GYVLRTPQNRAYWVRGAISAKYMEPGVATTLGFPTGNERLIPGGAFQEFT" + + "NGNIYWSASTGAHYILRGGIFDAWGAKGYEQGEYGWPTTDQTSIAAGGET" + "ITFQNGTIRQVNGRIEESR"; + final String query = "seq=" + seq + "" + "&" + "output=xml"; + String result = ""; + try { + result = getResult( query ); + } + catch ( final IOException e ) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + System.out.println( result ); + final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + Document dom = null; + try { + //Using factory get an instance of document builder + final DocumentBuilder db = dbf.newDocumentBuilder(); + //parse using builder to get DOM representation of the XML file + dom = db.parse( new ByteArrayInputStream( result.getBytes() ) ); + } + catch ( final ParserConfigurationException pce ) { + pce.printStackTrace(); + } + catch ( final SAXException se ) { + se.printStackTrace(); + } + catch ( final IOException ioe ) { + ioe.printStackTrace(); + } + final Element docEle = dom.getDocumentElement(); + final NodeList nl = docEle.getElementsByTagName( "job" ); + String result_url = ""; + for( int i = 0; i < nl.getLength(); i++ ) { + //System.out.println( nl.item( i ) ); + result_url = getTextValue( ( Element ) nl.item( i ), "result_url" ); + } + System.out.println( "result url = " + result_url ); + //gettin the result.... + try { + //do what you want to do before sleeping + Thread.sleep( 5000 );//sleep for x ms + //do what you want to do after sleeptig + } + catch ( final InterruptedException ie ) { + ie.printStackTrace(); + } + try { + result = getResult( result_url, "" ); + } + catch ( final IOException e ) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + System.out.println( result ); + } + + private static String getTextValue( final Element ele, final String tagName ) { + String textVal = null; + final NodeList nl = ele.getElementsByTagName( tagName ); + if ( ( nl != null ) && ( nl.getLength() > 0 ) ) { + final Element el = ( Element ) nl.item( 0 ); + textVal = el.getFirstChild().getNodeValue(); + } + return textVal; + } + + public static String getResult( final String base_url, final String query ) throws IOException { + System.out.println( query ); + final URL url = new URL( base_url ); + final URLConnection urlc = url.openConnection(); + urlc.setDoOutput( true ); + urlc.setAllowUserInteraction( false ); + final PrintStream ps = new PrintStream( urlc.getOutputStream() ); + //System.out.println( "query: " + query ); + ps.print( query.trim() ); + ps.close(); + final BufferedReader br = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) ); + final StringBuffer sb = new StringBuffer(); + String line = null; + while ( ( line = br.readLine() ) != null ) { + sb.append( line + LINE_SEPARATOR ); + } + br.close(); + return sb.toString().trim(); + } + + public static String getResult( final String query ) throws IOException { + System.out.println( query ); + final URL url = new URL( BASE_URL ); + final URLConnection urlc = url.openConnection(); + urlc.setDoOutput( true ); + urlc.setAllowUserInteraction( false ); + final PrintStream ps = new PrintStream( urlc.getOutputStream() ); + //System.out.println( "query: " + query ); + ps.print( query.trim() ); + ps.close(); + final BufferedReader br = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) ); + final StringBuffer sb = new StringBuffer(); + String line = null; + while ( ( line = br.readLine() ) != null ) { + sb.append( line + LINE_SEPARATOR ); + } + br.close(); + return sb.toString().trim(); + } +} diff --git a/forester/java/src/org/forester/development/MsaRenderer.java b/forester/java/src/org/forester/development/MsaRenderer.java new file mode 100644 index 0000000..6ee7715 --- /dev/null +++ b/forester/java/src/org/forester/development/MsaRenderer.java @@ -0,0 +1,408 @@ +// $Id: +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.development; + +import java.awt.Color; +import java.awt.Dimension; +import java.awt.Graphics; +import java.awt.event.MouseAdapter; +import java.awt.event.MouseEvent; +import java.awt.event.MouseMotionAdapter; + +import javax.swing.JComponent; + +import org.forester.msa.Msa; + +public class MsaRenderer extends JComponent { + + private static final long serialVersionUID = -68078011081748093L; + + public static boolean isMouseEventAltered( final MouseEvent event ) { + return event.isShiftDown() || event.isAltDown() || event.isControlDown() || event.isAltGraphDown() + || event.isMetaDown(); + } + //private PlateDisplayPanel _heatMapPanel; + private final int _rows; + private final int _columns; + private int _wellSize; + private AbstractRenderer _wells[][]; + private double _min; + private double _max; + private double _mean; + private Color _minColor; + private Color _maxColor; + private Color _meanColor; + private boolean _useMean; + // private Rubberband _rubberband; + private final Msa _msa; + private final JComponent _parent; + + public MsaRenderer( final Msa msa, final int unit_size, final JComponent parent ) { + _parent = parent; + _msa = msa; + _rows = _msa.getNumberOfSequences(); + _columns = _msa.getLength(); + setWellSize( unit_size ); + addMouseListeners(); + initializeWells(); + //setRubberband( new RubberbandRectangle( this ) ); + } + + private void addMouseListeners() { + addMouseMotionListener( new MouseMotionAdapter() { + + @Override + public void mouseDragged( final MouseEvent event ) { + // if ( ( ( event.getModifiers() & 0x10 ) != 0 ) && getRubberband().isActive() + // && !PlateRenderer.isMouseEventAltered( event ) ) { + // getRubberband().stretch( event.getPoint() ); + // } + } + } ); + addMouseListener( new MouseAdapter() { + + @Override + public void mouseClicked( final MouseEvent event ) { + // if ( ( ( event.getModifiers() & 4 ) != 0 ) || PlateRenderer.isMouseEventAltered( event ) ) { + // getInfo( new Rectangle( event.getX(), event.getY(), 1, 1 ) ); + // } + // else { + // changeSelected( new Rectangle( event.getX(), event.getY(), 1, 1 ), true ); + // event.consume(); + // } + } + + @Override + public void mousePressed( final MouseEvent event ) { + // if ( ( ( event.getModifiers() & 4 ) != 0 ) || PlateRenderer.isMouseEventAltered( event ) ) { + // getInfo( new Rectangle( event.getX(), event.getY(), 1, 1 ) ); + // } + // if ( ( ( event.getModifiers() & 0x10 ) != 0 ) && getRubberband().isActive() ) { + // getRubberband().anchor( event.getPoint() ); + // } + } + + @Override + public void mouseReleased( final MouseEvent event ) { + // if ( ( ( event.getModifiers() & 0x10 ) != 0 ) && getRubberband().isActive() ) { + // getRubberband().end( event.getPoint() ); + // rubberbandEnded( getRubberband() ); + // } + } + } ); + } + + public AbstractRenderer getAbstractRenderer( final int row, final int col ) { + return _wells[ row ][ col ]; + } + + public int getColumns() { + return _columns; + } + + private double getMax() { + return _max; + } + + private Color getMaxColor() { + return _maxColor; + } + + private double getMean() { + return _mean; + } + + private Color getMeanColor() { + return _meanColor; + } + + private double getMin() { + return _min; + } + + private Color getMinColor() { + return _minColor; + } + + @Override + public Dimension getPreferredSize() { + final int width = ( getWellSize() + 1 ) * ( getColumns() + 1 ); + final int hight = ( getWellSize() + 1 ) * ( getRows() + 1 ) + 30; + return new Dimension( width, hight ); + } + + public int getRows() { + return _rows; + } + + // private Rubberband getRubberband() { + // return _rubberband; + // } + private int getWellSize() { + return _wellSize; + } + + private void initializeWells() { + _wells = new AbstractRenderer[ getRows() + 1 ][ getColumns() + 1 ]; + for( int row = 0; row < getRows(); row++ ) { + for( int col = 0; col < getColumns() + 1; col++ ) { + AbstractRenderer r; + if ( col == getColumns() ) { + // r = new LabelRenderer( PlateData.ALPHABET[ row % PlateData.ALPHABET.length ] + "", this ); + } + //else if ( getPlateData().getData( row, col ) == null ) { + // r = new WellRenderer( new WellData(), this ); + // } + else { + r = new ResidueRenderer( getMsa().getResidueAt( row, col ), this ); + } + // r.setVisible( true ); + // setAbstractRenderer( r, row, col ); + } + } + for( int col = 0; col < getColumns() + 1; col++ ) { + // AbstractRenderer r; + if ( col == getColumns() ) { + // r = new LabelRenderer( "", this ); + } + else { + // r = new LabelRenderer( ( col + 1 ) + "", this ); + } + // r.setVisible( true ); + // setAbstractRenderer( r, getRows(), col ); + } + } + + private Msa getMsa() { + return _msa; + } + + public void inverseMarkedOfWell( final int well_row, final int well_col ) { + final ResidueRenderer rend = ( ResidueRenderer ) getAbstractRenderer( well_row, well_col ); + if ( rend.isMarked() ) { + rend.setIsMarked( false ); + } + else { + rend.setIsMarked( true ); + } + } + + private boolean isUseMean() { + return _useMean; + } + + @Override + public void paint( final Graphics g ) { + g.setColor( Color.white ); + // g.setFont( getPlateDisplayPanel().getPlateTitleFont() ); + // g + // .drawString( "Number:" + getPlateData().getName() + " Replicate:" + // + ( getPlateData().getReplicateNumber() + 1 ), 10, 20 ); + for( int row = 0; row < getRows() + 1; row++ ) { + for( int col = 0; col < getColumns() + 1; col++ ) { + getAbstractRenderer( row, col ).paint( g ); + } + } + } + + public void resetWellColors() { + for( int row = 0; row < getRows(); row++ ) { + for( int col = 0; col < getColumns(); col++ ) { + final ResidueRenderer r = ( ResidueRenderer ) getAbstractRenderer( row, col ); + if ( isUseMean() ) { + r.resetWellColor( getMin(), getMax(), getMean(), getMinColor(), getMaxColor(), getMeanColor() ); + } + else { + r.resetWellColor( getMin(), getMax(), getMinColor(), getMaxColor() ); + } + } + } + } + + public void resetWellSize( final int well_size ) { + setWellSize( well_size ); + final int factor = well_size + 0; + for( int row = 0; row < getRows() + 1; row++ ) { + for( int col = 0; col < getColumns() + 1; col++ ) { + final AbstractRenderer r = getAbstractRenderer( row, col ); + r.setX( 10 + factor * col ); + r.setY( factor * row + 30 ); + r.setWellSize( well_size ); + } + } + } + + // private void rubberbandEnded( final Rubberband rb ) { + // changeSelected( rb.getBounds(), false ); + // repaint(); + // } + private void setAbstractRenderer( final AbstractRenderer ar, final int row, final int col ) { + _wells[ row ][ col ] = ar; + } + + public void setFlaggedStatusOfOutlierWells( final boolean set_flagged_to_this ) { + for( int row = 0; row < getRows(); row++ ) { + for( int col = 0; col < getColumns(); col++ ) { + final ResidueRenderer wr = ( ResidueRenderer ) getAbstractRenderer( row, col ); + // if ( wr.isFlaggedByStatisticalAnalysis() ) { + // wr.setIsUserFlagged( set_flagged_to_this ); + // } + } + } + } + + public void setFlaggedStatusOfSelectedWells( final boolean set_flagged_to_this ) { + for( int row = 0; row < getRows(); row++ ) { + for( int col = 0; col < getColumns(); col++ ) { + final ResidueRenderer wr = ( ResidueRenderer ) getAbstractRenderer( row, col ); + if ( wr.isSelected() ) { + // wr.setIsUserFlagged( set_flagged_to_this ); + wr.setIsSelected( false ); + } + } + } + } + + public void setIsFlaggingStatusChangedToFalse() { + for( int row = 0; row < getRows(); row++ ) { + for( int col = 0; col < getColumns(); col++ ) { + final ResidueRenderer wr = ( ResidueRenderer ) getAbstractRenderer( row, col ); + // wr.setIsFlaggingStatusChanged( false ); + } + } + } + + private void setIsSelectedOfAll( final boolean isSelected ) { + for( int col = 0; col < getColumns() + 1; col++ ) { + setIsSelectedOfColumn( col, isSelected ); + } + } + + private void setIsSelectedOfColumn( final int column, final boolean isSelected ) { + for( int row = 0; row < getRows() + 1; row++ ) { + getAbstractRenderer( row, column ).setIsSelected( isSelected ); + } + } + + private void setIsSelectedOfRow( final int row, final boolean isSelected ) { + for( int col = 0; col < getColumns() + 1; col++ ) { + getAbstractRenderer( row, col ).setIsSelected( isSelected ); + } + } + + private void setIsSelectedOfRowAlternating( final int row, final boolean even ) { + boolean selected = even; + for( int col = 0; col < getColumns(); col++ ) { + getAbstractRenderer( row, col ).setIsSelected( selected ); + selected = !selected; + } + } + + private void setIsSelectedToQuarter( final int quarter ) { + boolean evenRow = false; + boolean evenColumn = false; + if ( quarter <= 1 ) { + evenRow = true; + evenColumn = true; + } + else if ( quarter == 2 ) { + evenColumn = true; + } + else if ( quarter == 3 ) { + evenRow = true; + } + for( int row = 0; row < getRows(); row++ ) { + if ( evenColumn ) { + setIsSelectedOfRowAlternating( row, evenRow ); + } + else { + setIsSelectedOfRow( row, false ); + } + evenColumn = !evenColumn; + } + } + + public void setMarkedOfAllWellsToFalse() { + for( int row = 0; row < getRows(); row++ ) { + for( int col = 0; col < getColumns(); col++ ) { + final ResidueRenderer wr = ( ResidueRenderer ) getAbstractRenderer( row, col ); + // rend.setIsMarked( false ); + } + } + } + + public void setMax( final double max ) { + _max = max; + } + + void setMaxColor( final Color maxColor ) { + _maxColor = maxColor; + } + + void setMean( final double mean ) { + _mean = mean; + } + + public void setMeanColor( final Color meanColor ) { + _meanColor = meanColor; + } + + public void setMin( final double min ) { + _min = min; + } + + void setMinColor( final Color minColor ) { + _minColor = minColor; + } + + // private void setRubberband( final Rubberband rb ) { + // if ( _rubberband != null ) { + // _rubberband.setActive( false ); + // } + // _rubberband = rb; + // if ( _rubberband != null ) { + // _rubberband.setComponent( this ); + // _rubberband.setActive( true ); + // } + // } + public void setUseMean( final boolean useMean ) { + _useMean = useMean; + } + + private void setWellSize( final int wellSize ) { + _wellSize = wellSize; + } + + public void unSelectUnMarkAll() { + for( int row = 0; row < getRows(); row++ ) { + for( int col = 0; col < getColumns(); col++ ) { + final ResidueRenderer wr = ( ResidueRenderer ) getAbstractRenderer( row, col ); + wr.setIsSelected( false ); + wr.setIsMarked( false ); + } + } + } +} diff --git a/forester/java/src/org/forester/development/RandomThing.java b/forester/java/src/org/forester/development/RandomThing.java new file mode 100644 index 0000000..c19631c --- /dev/null +++ b/forester/java/src/org/forester/development/RandomThing.java @@ -0,0 +1,17 @@ + +package org.forester.development; + +import java.util.Random; + +public class RandomThing { + + public static void main( final String[] args ) { + final Random rg = new Random(); + for( int i = 0; i < 200; i++ ) { + for( int j = 0; j < 30; j++ ) { + System.out.print( "\t" + rg.nextFloat() ); + } + System.out.println(); + } + } +} diff --git a/forester/java/src/org/forester/development/ResidueRenderer.java b/forester/java/src/org/forester/development/ResidueRenderer.java new file mode 100644 index 0000000..da8088c --- /dev/null +++ b/forester/java/src/org/forester/development/ResidueRenderer.java @@ -0,0 +1,235 @@ +// $Id: +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.development; + +import java.awt.Color; +import java.awt.Graphics; +import java.awt.Graphics2D; +import java.awt.RenderingHints; + +public class ResidueRenderer extends AbstractRenderer { + + static final Color EMPTY_COLOR = new Color( 250, 0, 250 ); + static final Color POSITIVE_COLOR = new Color( 250, 0, 250 ); + static final Color NEGATIVE_COLOR = new Color( 250, 0, 250 ); + static final Color NULL_COLOR = new Color( 50, 50, 50 ); + static final int DISTANCE_OVAL_BORDER = 1; + static final int SIZE_LIMIT = 7; + /** + * + */ + private static final long serialVersionUID = -2331160296913478874L; + private final char _value; + private Color _wellColor; + private boolean _isMarked; + private boolean _isSelected; + private final MsaRenderer _parentPlateRenderer; + + public ResidueRenderer( final char value, final MsaRenderer parentPlateRenderer ) { + _value = value; + _parentPlateRenderer = parentPlateRenderer; + setIsSelected( false ); + setIsMarked( false ); + setStatus( ( byte ) 0 ); + } + + private double calcFactor( final double min, final double max ) { + return ( max - min ) / 255D; + } + + private Color calcWellColor( double value, + final double min, + final double max, + final Color minColor, + final Color maxColor ) { + if ( value < min ) { + value = min; + } + if ( value > max ) { + value = max; + } + final double x = ( 255D * ( value - min ) ) / ( max - min ); + final int red = ( int ) ( minColor.getRed() + x * calcFactor( minColor.getRed(), maxColor.getRed() ) ); + final int green = ( int ) ( minColor.getGreen() + x * calcFactor( minColor.getGreen(), maxColor.getGreen() ) ); + final int blue = ( int ) ( minColor.getBlue() + x * calcFactor( minColor.getBlue(), maxColor.getBlue() ) ); + return new Color( red, green, blue ); + } + + private Color calcWellColor( double value, + final double min, + final double max, + final double mean, + final Color minColor, + final Color maxColor, + final Color meanColor ) { + // if ( isEmpty() ) { + // return ResidueRenderer.NULL_COLOR; + // } + if ( meanColor == null ) { + return calcWellColor( value, min, max, minColor, maxColor ); + } + if ( value < min ) { + value = min; + } + if ( value > max ) { + value = max; + } + if ( value < mean ) { + final double x = ( 255D * ( value - min ) ) / ( mean - min ); + final int red = ( int ) ( minColor.getRed() + x * calcFactor( minColor.getRed(), meanColor.getRed() ) ); + final int green = ( int ) ( minColor.getGreen() + x + * calcFactor( minColor.getGreen(), meanColor.getGreen() ) ); + final int blue = ( int ) ( minColor.getBlue() + x * calcFactor( minColor.getBlue(), meanColor.getBlue() ) ); + return new Color( red, green, blue ); + } + if ( value > mean ) { + final double x = ( 255D * ( value - mean ) ) / ( max - mean ); + final int red = ( int ) ( meanColor.getRed() + x * calcFactor( meanColor.getRed(), maxColor.getRed() ) ); + final int green = ( int ) ( meanColor.getGreen() + x + * calcFactor( meanColor.getGreen(), maxColor.getGreen() ) ); + final int blue = ( int ) ( meanColor.getBlue() + x * calcFactor( meanColor.getBlue(), maxColor.getBlue() ) ); + return new Color( red, green, blue ); + } + else { + return meanColor; + } + } + + public double getDataValue() { + return _value; + } + + @Override + public MsaRenderer getParentPlateRenderer() { + return _parentPlateRenderer; + } + + public Color getWellColor() { + return _wellColor; + } + + public boolean isMarked() { + return _isMarked; + } + + @Override + public boolean isSelected() { + return _isSelected; + } + + @Override + public void paint( final Graphics g ) { + final int width = getWellSize() - 1; + final int width_ = width - 1; + final int width__ = ( width_ - 1 ) + 1; + final int width__s = width__ - 2; + final int x_ = getX() + 1; + final int y_ = getY() + 1; + // final PlateDisplayPanel hmp = getParentPlateRenderer() + // .getPlateDisplayPanel(); + // boolean draw_circle = hmp.isDrawCircle() + // || ( !hmp.isDrawCircle() && !hmp.isDrawSquare() && ( width > 7 ) ); + // final boolean show_user_flags = _parentPlateRenderer + // .getPlateDisplayPanel().showUserFlagsCheckBox.isSelected(); + // final boolean show_outlier_flags = _parentPlateRenderer + // .getPlateDisplayPanel().showOutliersCheckBox.isSelected(); + // final boolean show_hit_picks = _parentPlateRenderer + // .getPlateDisplayPanel().showHitPicksCheckBox.isSelected(); + // final boolean show_confirmed_hits = _parentPlateRenderer + // .getPlateDisplayPanel().showConfirmedHitsCheckBox.isSelected(); + final Graphics2D g2 = ( Graphics2D ) g; + g2.setRenderingHint( RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_SPEED ); + if ( isMarked() ) { + g2.setColor( AbstractRenderer.MARKED_COLOR ); + } + // else if ( !draw_circle && isSelected() ) { + // g2.setColor( AbstractRenderer.SELECTED_COLOR ); + // } + else { + g2.setColor( AbstractRenderer.DEFAULT_COLOR ); + } + g2.drawRect( getX(), getY(), width, width ); + // if ( draw_circle ) { + // if ( isSelected() && isMarked() ) { + // g2.setColor( AbstractRenderer.MARKED_COLOR ); + // } + // else if ( isSelected() ) { + // g2.setColor( AbstractRenderer.SELECTED_COLOR ); + // } + // else { + // g2.setColor( AbstractRenderer.DEFAULT_COLOR ); + // } + // g2.fillRect( x_, y_, width_, width_ ); + // } + g2.setColor( getWellColor() ); + // if ( draw_circle ) { + // g2.setRenderingHint( RenderingHints.KEY_ANTIALIASING, + // RenderingHints.VALUE_ANTIALIAS_ON ); + // if ( isSelected() && ( width > 6 ) ) { + // g2.fillOval( getX() + 2, getY() + 2, width__s, width__s ); + // } + // else if ( width < 5 ) { + // g2.fillOval( ( getX() + 1 ) - 1, ( getY() + 1 ) - 1, + // width__ + 2, width__ + 2 ); + // } + // else { + // g2.fillOval( getX() + 1, getY() + 1, width__, width__ ); + // } + // } + if ( isMarked() || isSelected() ) { + g2.fillRect( getX() + 1, getY() + 1, width_, width_ ); + } + else { + g2.fillRect( ( getX() + 1 ) - 1, ( getY() + 1 ) - 1, width_ + 2, width_ + 2 ); + } + } + + public void resetWellColor( final double min, final double max, final Color minColor, final Color maxColor ) { + setWellColor( calcWellColor( getDataValue(), min, max, minColor, maxColor ) ); + } + + public void resetWellColor( final double min, + final double max, + final double mean, + final Color minColor, + final Color maxColor, + final Color meanColor ) { + setWellColor( calcWellColor( getDataValue(), min, max, mean, minColor, maxColor, meanColor ) ); + } + + public void setIsMarked( final boolean isMarked ) { + _isMarked = isMarked; + } + + @Override + public void setIsSelected( final boolean isSelected ) { + _isSelected = isSelected; + } + + private void setWellColor( final Color wellColor ) { + _wellColor = wellColor; + } +} diff --git a/forester/java/src/org/forester/development/Test.java b/forester/java/src/org/forester/development/Test.java new file mode 100644 index 0000000..5011938 --- /dev/null +++ b/forester/java/src/org/forester/development/Test.java @@ -0,0 +1,91 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.development; + +import java.io.File; +import java.util.Date; +import java.util.Locale; + +import org.forester.util.ForesterUtil; + +/* + * * + */ +public class Test { + + private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + + "test_data" + ForesterUtil.getFileSeparator(); + + public static void main( final String[] args ) { + System.out.println( "[Java version: " + ForesterUtil.JAVA_VERSION + " " + ForesterUtil.JAVA_VENDOR + "]" ); + System.out.println( "[OS: " + ForesterUtil.OS_NAME + " " + ForesterUtil.OS_ARCH + " " + ForesterUtil.OS_VERSION + + "]" ); + Locale.setDefault( Locale.US ); + System.out.println( "[Locale: " + Locale.getDefault() + "]" ); + final int failed = 0; + final int succeeded = 0; + System.out.print( "[Test if directory with files for testing exists/is readable: " ); + if ( Test.testDir( PATH_TO_TEST_DATA ) ) { + System.out.println( "OK.]" ); + } + else { + System.out.println( "could not find/read from directory \"" + PATH_TO_TEST_DATA + "\".]" ); + System.out.println( "Testing aborted." ); + System.exit( -1 ); + } + final long start_time = new Date().getTime(); + System.out.println( "\nTime requirement: " + ( new Date().getTime() - start_time ) + "ms." ); + System.out.println(); + System.out.println( "Successful tests: " + succeeded ); + System.out.println( "Failed tests: " + failed ); + System.out.println(); + if ( failed < 1 ) { + System.out.println( "OK." ); + } + else { + System.out.println( "Not OK." ); + } + } + + private static boolean testDir( final String file ) { + try { + final File f = new File( file ); + if ( !f.exists() ) { + return false; + } + if ( !f.isDirectory() ) { + return false; + } + if ( !f.canRead() ) { + return false; + } + } + catch ( final Exception e ) { + return false; + } + return true; + } +} diff --git a/forester/java/src/org/forester/evoinference/TestPhylogenyReconstruction.java b/forester/java/src/org/forester/evoinference/TestPhylogenyReconstruction.java new file mode 100644 index 0000000..5849ac1 --- /dev/null +++ b/forester/java/src/org/forester/evoinference/TestPhylogenyReconstruction.java @@ -0,0 +1,2359 @@ +// $Id: +// $ +// +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.evoinference; + +import java.io.File; +import java.io.FileInputStream; +import java.io.StringWriter; +import java.util.Date; +import java.util.List; + +import org.forester.evoinference.distance.NeighborJoining; +import org.forester.evoinference.distance.PairwiseDistanceCalculator; +import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; +import org.forester.evoinference.matrix.character.CharacterStateMatrix; +import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; +import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; +import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; +import org.forester.evoinference.matrix.distance.DistanceMatrix; +import org.forester.evoinference.parsimony.DolloParsimony; +import org.forester.evoinference.parsimony.FitchParsimony; +import org.forester.io.parsers.GeneralMsaParser; +import org.forester.io.parsers.SymmetricalDistanceMatrixParser; +import org.forester.io.parsers.nhx.NHXParser; +import org.forester.msa.Msa; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.util.ForesterUtil; + +public class TestPhylogenyReconstruction { + + private final static double ZERO_DIFF = 1.0E-9; + private final static boolean TIME = false; + + public static boolean isEqual( final double a, final double b ) { + return ( ( Math.abs( a - b ) ) < ZERO_DIFF ); + } + + public static void main( final String[] args ) { + timeNeighborJoining(); + } + + public static boolean test( final File test_dir ) { + System.out.print( " Basic symmetrical distance matrix: " ); + if ( !testBasicSymmetricalDistanceMatrix() ) { + System.out.println( "failed." ); + return false; + } + System.out.println( "OK." ); + System.out.print( " Basic character state matrix: " ); + if ( !testBasicCharacterStateMatrix() ) { + System.out.println( "failed." ); + return false; + } + System.out.println( "OK." ); + System.out.print( " Symmetrical distance matrix parser: " ); + if ( !testSymmetricalDistanceMatrixParser() ) { + System.out.println( "failed." ); + return false; + } + System.out.println( "OK." ); + System.out.print( " Distance Calculation: " ); + if ( !testDistanceCalculationMethods( test_dir ) ) { + System.out.println( "failed." ); + return false; + } + System.out.println( "OK." ); + System.out.print( " Neighbor Joining: " ); + if ( !testNeighborJoining() ) { + System.out.println( "failed." ); + return false; + } + System.out.println( "OK." ); + System.out.print( " Dollo Parsimony: " ); + if ( !testDolloParsimony() ) { + System.out.println( "failed." ); + return false; + } + System.out.println( "OK." ); + System.out.print( " Dollo Parsimony on non binary trees: " ); + if ( !testDolloParsimonyOnNonBinaryTree() ) { + System.out.println( "failed." ); + return false; + } + System.out.println( "OK." ); + System.out.print( " Fitch Parsimony: " ); + if ( !testFitchParsimony() ) { + System.out.println( "failed." ); + return false; + } + System.out.println( "OK." ); + return true; + } + + private static boolean testDistanceCalculationMethods( final File test_dir ) { + try { + final Msa msa0 = GeneralMsaParser.parse( new FileInputStream( test_dir + ForesterUtil.FILE_SEPARATOR + + "bcl.aln" ) ); + final BasicSymmetricalDistanceMatrix pwd0 = PairwiseDistanceCalculator.calcKimuraDistances( msa0 ); + if ( pwd0.getSize() != 120 ) { + return false; + } + for( int i = 0; i < pwd0.getSize(); ++i ) { + if ( !isEqual( pwd0.getValue( i, i ), 0.0 ) ) { + return false; + } + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testBasicCharacterStateMatrix() { + try { + final CharacterStateMatrix matrix_0 = new BasicCharacterStateMatrix( 4, 8 ); + final CharacterStateMatrix matrix_00 = new BasicCharacterStateMatrix( 4, 8 ); + matrix_0.setIdentifier( 0, "A" ); + matrix_0.setIdentifier( 1, "B" ); + matrix_0.setIdentifier( 2, "C" ); + matrix_0.setIdentifier( 3, "D" ); + matrix_0.setCharacter( 0, "0" ); + matrix_0.setCharacter( 1, "1" ); + matrix_0.setCharacter( 2, "2" ); + matrix_0.setCharacter( 3, "3" ); + matrix_0.setCharacter( 4, "4" ); + matrix_0.setCharacter( 5, "5" ); + matrix_0.setCharacter( 6, "6" ); + matrix_0.setCharacter( 7, "7" ); + matrix_00.setIdentifier( 0, "A" ); + matrix_00.setIdentifier( 1, "B" ); + matrix_00.setIdentifier( 2, "C" ); + matrix_00.setIdentifier( 3, "D" ); + matrix_00.setCharacter( 3, "3" ); + matrix_00.setCharacter( 4, "4" ); + if ( !matrix_0.getCharacter( 1 ).equals( "1" ) ) { + return false; + } + if ( !matrix_0.getIdentifier( 0 ).equals( "A" ) ) { + return false; + } + matrix_0.setState( 0, 0, "00" ); + matrix_00.setState( 0, 0, "00" ); + if ( !matrix_0.getState( 0, 0 ).equals( "00" ) ) { + return false; + } + matrix_0.setState( 0, 1, "01" ); + matrix_00.setState( 0, 1, "01" ); + if ( !matrix_0.getState( 0, 1 ).equals( "01" ) ) { + return false; + } + matrix_0.setState( 1, 1, "11" ); + matrix_00.setState( 1, 1, "11" ); + if ( !matrix_0.getState( 1, 1 ).equals( "11" ) ) { + return false; + } + matrix_0.setState( 1, 0, "10" ); + matrix_00.setState( 1, 0, "10" ); + if ( !matrix_0.getState( 1, 0 ).equals( "10" ) ) { + return false; + } + matrix_0.setState( 1, 2, "12" ); + matrix_00.setState( 1, 2, "12" ); + if ( !matrix_0.getState( 1, 2 ).equals( "12" ) ) { + return false; + } + matrix_0.setState( 3, 7, "37" ); + matrix_00.setState( 3, 7, "37" ); + if ( !matrix_0.getState( 3, 7 ).equals( "37" ) ) { + return false; + } + matrix_0.setState( 2, 6, "26" ); + matrix_00.setState( 2, 6, "26" ); + if ( !matrix_0.getState( 2, 6 ).equals( "26" ) ) { + return false; + } + matrix_0.setState( "D", "3", "33" ); + matrix_00.setState( "D", "3", "33" ); + if ( !matrix_0.getState( 3, 3 ).equals( "33" ) ) { + return false; + } + if ( !matrix_0.getState( "D", "3" ).equals( "33" ) ) { + return false; + } + matrix_0.setState( "C", "4", "24" ); + matrix_00.setState( "C", "4", "24" ); + if ( !matrix_0.getState( 2, 4 ).equals( "24" ) ) { + return false; + } + if ( !matrix_0.getState( "C", "4" ).equals( "24" ) ) { + return false; + } + if ( matrix_0.isEmpty() ) { + return false; + } + if ( matrix_0.getNumberOfIdentifiers() != 4 ) { + return false; + } + if ( matrix_0.getNumberOfCharacters() != 8 ) { + return false; + } + if ( !matrix_0.equals( matrix_0 ) ) { + return false; + } + if ( !matrix_0.equals( matrix_00 ) ) { + return false; + } + matrix_00.setState( "C", "4", "123" ); + if ( matrix_0.equals( matrix_00 ) ) { + return false; + } + final Integer[][] ints = { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 10, 11, 12 } }; + final CharacterStateMatrix matrix_000 = new BasicCharacterStateMatrix( ints ); + matrix_000.toString(); + if ( matrix_000.getNumberOfCharacters() != 4 ) { + return false; + } + if ( matrix_000.getNumberOfIdentifiers() != 3 ) { + return false; + } + if ( matrix_000.getState( 0, 1 ) != 2 ) { + return false; + } + if ( matrix_000.getState( 2, 3 ) != 12 ) { + return false; + } + final Integer[][] ints0 = { { 1, 2, 3, 4 }, { 5, 6, 7, 8 }, { 9, 10, 11, 12 } }; + final CharacterStateMatrix matrix_0000 = new BasicCharacterStateMatrix( ints0 ); + if ( !matrix_000.equals( matrix_0000 ) ) { + return false; + } + final Integer[][] ints00 = { { 1, 2, 3, -4 }, { 5, 6, 7, 8 }, { 9, 10, 11, 12 } }; + final CharacterStateMatrix matrix_00000 = new BasicCharacterStateMatrix( ints00 ); + if ( matrix_000.equals( matrix_00000 ) ) { + return false; + } + final CharacterStateMatrix clone0 = matrix_0.copy(); + final CharacterStateMatrix clone00 = matrix_00.copy(); + if ( !clone0.equals( matrix_0 ) ) { + return false; + } + if ( !clone00.equals( matrix_00 ) ) { + return false; + } + if ( clone00.equals( clone0 ) ) { + return false; + } + final CharacterStateMatrix pivot0 = matrix_0.pivot(); + final CharacterStateMatrix pivot00 = matrix_00.pivot(); + if ( !pivot0.getState( 1, 0 ).equals( "01" ) ) { + return false; + } + if ( !pivot0.getState( 6, 2 ).equals( "26" ) ) { + return false; + } + if ( !matrix_0.getState( 2, 6 ).equals( "26" ) ) { + return false; + } + final CharacterStateMatrix pivotpivot00 = pivot00.pivot(); + if ( !pivotpivot00.equals( matrix_00 ) ) { + return false; + } + final CharacterStateMatrix nex = new BasicCharacterStateMatrix( 4, 3 ); + nex.setIdentifier( 0, "amphioxus" ); + nex.setIdentifier( 1, "sponge" ); + nex.setIdentifier( 2, "sea_anemone" ); + nex.setIdentifier( 3, "cobra" ); + nex.setCharacter( 0, "notch" ); + nex.setCharacter( 1, "homeobox" ); + nex.setCharacter( 2, "wnt" ); + nex.setState( 0, 0, BinaryStates.ABSENT ); + nex.setState( 0, 1, BinaryStates.ABSENT ); + nex.setState( 0, 2, BinaryStates.ABSENT ); + nex.setState( 1, 0, BinaryStates.PRESENT ); + nex.setState( 1, 1, BinaryStates.PRESENT ); + nex.setState( 1, 2, BinaryStates.ABSENT ); + nex.setState( 2, 0, BinaryStates.PRESENT ); + nex.setState( 2, 1, BinaryStates.PRESENT ); + nex.setState( 2, 2, BinaryStates.PRESENT ); + nex.setState( 3, 0, BinaryStates.PRESENT ); + nex.setState( 3, 1, BinaryStates.ABSENT ); + nex.setState( 3, 2, BinaryStates.ABSENT ); + StringWriter w = new StringWriter(); + nex.toWriter( w, CharacterStateMatrix.Format.NEXUS_BINARY ); + //System.out.println( w.getBuffer().toString() ); + w = new StringWriter(); + nex.pivot().toWriter( w, CharacterStateMatrix.Format.NEXUS_BINARY ); + //System.out.println( w.getBuffer().toString() ); + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testBasicSymmetricalDistanceMatrix() { + try { + final DistanceMatrix matrix_0 = new BasicSymmetricalDistanceMatrix( 4 ); + matrix_0.setIdentifier( 0, "A" ); + matrix_0.setIdentifier( 1, "B" ); + matrix_0.setIdentifier( 2, "C" ); + matrix_0.setIdentifier( 3, "0123456789012" ); + matrix_0.setValue( 1, 0, 0.00001 ); + matrix_0.setValue( 0, 2, 0.0000009 ); + matrix_0.setValue( 3, 0, 3.0 ); + matrix_0.setValue( 1, 2, 4.0 ); + matrix_0.setValue( 3, 1, 5.0 ); + matrix_0.setValue( 2, 3, 6.0 ); + if ( !matrix_0.getIdentifier( 0 ).equals( "A" ) ) { + return false; + } + if ( !matrix_0.getIdentifier( 1 ).equals( "B" ) ) { + return false; + } + if ( !matrix_0.getIdentifier( 2 ).equals( "C" ) ) { + return false; + } + if ( !matrix_0.getIdentifier( 3 ).equals( "0123456789012" ) ) { + return false; + } + if ( matrix_0.getSize() != 4 ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 0, 0 ), 0.0 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 3, 3 ), 0.0 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 0, 1 ), 0.00001 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 0, 2 ), 0.0000009 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 0, 3 ), 3 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 1, 0 ), 0.00001 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 1, 2 ), 4 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 1, 3 ), 5 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 2, 0 ), 0.0000009 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 2, 1 ), 4 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 2, 3 ), 6 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 3, 0 ), 3 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 3, 1 ), 5 ) ) { + return false; + } + if ( !isEqual( matrix_0.getValue( 3, 2 ), 6 ) ) { + return false; + } + final StringBuffer matrix_0_phylip = new StringBuffer(); + matrix_0_phylip.append( " 4" ); + matrix_0_phylip.append( ForesterUtil.LINE_SEPARATOR ); + matrix_0_phylip.append( "A 0.000000 0.000010 0.000001 3.000000" ); + matrix_0_phylip.append( ForesterUtil.LINE_SEPARATOR ); + matrix_0_phylip.append( "B 0.000010 0.000000 4.000000 5.000000" ); + matrix_0_phylip.append( ForesterUtil.LINE_SEPARATOR ); + matrix_0_phylip.append( "C 0.000001 4.000000 0.000000 6.000000" ); + matrix_0_phylip.append( ForesterUtil.LINE_SEPARATOR ); + matrix_0_phylip.append( "0123456789 3.000000 5.000000 6.000000 0.000000" ); + if ( !matrix_0_phylip.toString() + .equals( matrix_0.toStringBuffer( DistanceMatrix.Format.PHYLIP ).toString() ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testDolloParsimony() { + try { + final BinaryStates PRESENT = BinaryStates.PRESENT; + final BinaryStates ABSENT = BinaryStates.ABSENT; + final GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT; + final DolloParsimony dollo1 = DolloParsimony.createInstance(); + final PhylogenyFactory factory1 = ParserBasedPhylogenyFactory.getInstance(); + final String p1_str = "((((((a,b)ab,c)ac,d)ad,(e,f)ef)af,(g,h)gh)ah,i)r"; + final Phylogeny p1 = factory1.create( p1_str, new NHXParser() )[ 0 ]; + CharacterStateMatrix m1 = new BasicCharacterStateMatrix( 9, + 1 ); + m1.setIdentifier( 0, "a" ); + m1.setIdentifier( 1, "b" ); + m1.setIdentifier( 2, "c" ); + m1.setIdentifier( 3, "d" ); + m1.setIdentifier( 4, "e" ); + m1.setIdentifier( 5, "f" ); + m1.setIdentifier( 6, "g" ); + m1.setIdentifier( 7, "h" ); + m1.setIdentifier( 8, "i" ); + m1.setCharacter( 0, "0" ); + m1.setState( "a", "0", PRESENT ); + m1.setState( "b", "0", ABSENT ); + m1.setState( "c", "0", PRESENT ); + m1.setState( "d", "0", ABSENT ); + m1.setState( "e", "0", ABSENT ); + m1.setState( "f", "0", ABSENT ); + m1.setState( "g", "0", ABSENT ); + m1.setState( "h", "0", ABSENT ); + m1.setState( "i", "0", ABSENT ); + dollo1.execute( p1, m1 ); + if ( dollo1.getTotalGains() != 1 ) { + return false; + } + if ( dollo1.getTotalLosses() != 1 ) { + return false; + } + if ( dollo1.getTotalUnchanged() != 15 ) { + return false; + } + m1.setState( "b", "0", PRESENT ); + dollo1.execute( p1, m1 ); + if ( dollo1.getTotalGains() != 1 ) { + return false; + } + if ( dollo1.getTotalLosses() != 0 ) { + return false; + } + if ( dollo1.getTotalUnchanged() != 16 ) { + return false; + } + m1.setState( "b", "0", ABSENT ); + m1.setState( "e", "0", PRESENT ); + dollo1.execute( p1, m1 ); + if ( dollo1.getTotalGains() != 1 ) { + return false; + } + if ( dollo1.getTotalLosses() != 3 ) { + return false; + } + if ( dollo1.getTotalUnchanged() != 13 ) { + return false; + } + m1.setState( "a", "0", ABSENT ); + m1.setState( "c", "0", ABSENT ); + m1.setState( "g", "0", PRESENT ); + dollo1.setReturnInternalStates( true ); + dollo1.setReturnGainLossMatrix( true ); + dollo1.execute( p1, m1 ); + if ( dollo1.getTotalGains() != 1 ) { + return false; + } + if ( dollo1.getTotalLosses() != 3 ) { + return false; + } + if ( dollo1.getTotalUnchanged() != 13 ) { + return false; + } + final DolloParsimony dollo2 = DolloParsimony.createInstance(); + final PhylogenyFactory factory2 = ParserBasedPhylogenyFactory.getInstance(); + final String p2_str = "((((((a,b)ab,c)ac,d)ad,(e,f)ef)af,(g,h,i)gi)ai,((j,k,l)jl,(m,n,o)mo,(p,q,r)pr)jr)root"; + final Phylogeny p2 = factory2.create( p2_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m2 = new BasicCharacterStateMatrix( 18, + 4 ); + m2.setIdentifier( 0, "a" ); + m2.setIdentifier( 1, "b" ); + m2.setIdentifier( 2, "c" ); + m2.setIdentifier( 3, "d" ); + m2.setIdentifier( 4, "e" ); + m2.setIdentifier( 5, "f" ); + m2.setIdentifier( 6, "g" ); + m2.setIdentifier( 7, "h" ); + m2.setIdentifier( 8, "i" ); + m2.setIdentifier( 9, "j" ); + m2.setIdentifier( 10, "k" ); + m2.setIdentifier( 11, "l" ); + m2.setIdentifier( 12, "m" ); + m2.setIdentifier( 13, "n" ); + m2.setIdentifier( 14, "o" ); + m2.setIdentifier( 15, "p" ); + m2.setIdentifier( 16, "q" ); + m2.setIdentifier( 17, "r" ); + m2.setCharacter( 0, "0" ); + m2.setCharacter( 1, "1" ); + m2.setCharacter( 2, "2" ); + m2.setCharacter( 3, "3" ); + m2.setState( "a", "0", PRESENT ); + m2.setState( "b", "0", ABSENT ); + m2.setState( "c", "0", PRESENT ); + m2.setState( "d", "0", ABSENT ); + m2.setState( "e", "0", ABSENT ); + m2.setState( "f", "0", ABSENT ); + m2.setState( "g", "0", ABSENT ); + m2.setState( "h", "0", ABSENT ); + m2.setState( "i", "0", ABSENT ); + m2.setState( "j", "0", ABSENT ); + m2.setState( "k", "0", ABSENT ); + m2.setState( "l", "0", ABSENT ); + m2.setState( "m", "0", ABSENT ); + m2.setState( "n", "0", ABSENT ); + m2.setState( "o", "0", ABSENT ); + m2.setState( "p", "0", ABSENT ); + m2.setState( "q", "0", ABSENT ); + m2.setState( "r", "0", ABSENT ); + m2.setState( "a", "1", PRESENT ); + m2.setState( "b", "1", ABSENT ); + m2.setState( "c", "1", PRESENT ); + m2.setState( "d", "1", ABSENT ); + m2.setState( "e", "1", ABSENT ); + m2.setState( "f", "1", ABSENT ); + m2.setState( "g", "1", PRESENT ); + m2.setState( "h", "1", ABSENT ); + m2.setState( "i", "1", ABSENT ); + m2.setState( "j", "1", PRESENT ); + m2.setState( "k", "1", ABSENT ); + m2.setState( "l", "1", ABSENT ); + m2.setState( "m", "1", PRESENT ); + m2.setState( "n", "1", ABSENT ); + m2.setState( "o", "1", ABSENT ); + m2.setState( "p", "1", ABSENT ); + m2.setState( "q", "1", ABSENT ); + m2.setState( "r", "1", ABSENT ); + m2.setState( "a", "2", ABSENT ); + m2.setState( "b", "2", ABSENT ); + m2.setState( "c", "2", ABSENT ); + m2.setState( "d", "2", ABSENT ); + m2.setState( "e", "2", ABSENT ); + m2.setState( "f", "2", ABSENT ); + m2.setState( "g", "2", ABSENT ); + m2.setState( "h", "2", ABSENT ); + m2.setState( "i", "2", ABSENT ); + m2.setState( "j", "2", PRESENT ); + m2.setState( "k", "2", ABSENT ); + m2.setState( "l", "2", ABSENT ); + m2.setState( "m", "2", PRESENT ); + m2.setState( "n", "2", ABSENT ); + m2.setState( "o", "2", ABSENT ); + m2.setState( "p", "2", PRESENT ); + m2.setState( "q", "2", ABSENT ); + m2.setState( "r", "2", ABSENT ); + m2.setState( "a", "3", ABSENT ); + m2.setState( "b", "3", ABSENT ); + m2.setState( "c", "3", PRESENT ); + m2.setState( "d", "3", ABSENT ); + m2.setState( "e", "3", ABSENT ); + m2.setState( "f", "3", ABSENT ); + m2.setState( "g", "3", PRESENT ); + m2.setState( "h", "3", ABSENT ); + m2.setState( "i", "3", ABSENT ); + m2.setState( "j", "3", ABSENT ); + m2.setState( "k", "3", ABSENT ); + m2.setState( "l", "3", ABSENT ); + m2.setState( "m", "3", ABSENT ); + m2.setState( "n", "3", ABSENT ); + m2.setState( "o", "3", ABSENT ); + m2.setState( "p", "3", ABSENT ); + m2.setState( "q", "3", ABSENT ); + m2.setState( "r", "3", ABSENT ); + dollo2.setReturnInternalStates( true ); + dollo2.setReturnGainLossMatrix( true ); + dollo2.execute( p2, m2 ); + final CharacterStateMatrix i_m = dollo2.getInternalStatesMatrix(); + final CharacterStateMatrix gl_m = dollo2.getGainLossMatrix(); + if ( dollo2.getTotalGains() != 3 ) { + return false; + } + if ( dollo2.getTotalLosses() != 22 ) { + return false; + } + if ( dollo2.getTotalUnchanged() != 95 ) { + return false; + } + if ( i_m.getState( "ab", "0" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "ac", "0" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "ad", "0" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "af", "0" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "ef", "0" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "ai", "0" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "gi", "0" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "jl", "0" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "mo", "0" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "pr", "0" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "jr", "0" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "root", "0" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "ab", "1" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "ac", "1" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "ad", "1" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "af", "1" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "ef", "1" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "ai", "1" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "gi", "1" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "jl", "1" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "mo", "1" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "pr", "1" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "jr", "1" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "root", "1" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "ab", "2" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "ac", "2" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "ad", "2" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "af", "2" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "ef", "2" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "ai", "2" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "gi", "2" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "jl", "2" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "mo", "2" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "pr", "2" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "jr", "2" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "root", "2" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "ab", "3" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "ac", "3" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "ad", "3" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "af", "3" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "ef", "3" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "ai", "3" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "gi", "3" ) != PRESENT ) { + return false; + } + if ( i_m.getState( "jl", "3" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "mo", "3" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "pr", "3" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "jr", "3" ) != ABSENT ) { + return false; + } + if ( i_m.getState( "root", "3" ) != ABSENT ) { + return false; + } + if ( gl_m.getState( "a", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + final DolloParsimony dollo9 = DolloParsimony.createInstance(); + final PhylogenyFactory factory9 = ParserBasedPhylogenyFactory.getInstance(); + final String p9_str = "((((((a,b)ab,c)ac,d)ad,(e,f)ef)af,(g,h)gh)ah,i)r"; + final Phylogeny p9 = factory9.create( p9_str, new NHXParser() )[ 0 ]; + m1 = new BasicCharacterStateMatrix( 9, 3 ); + m1.setIdentifier( 0, "a" ); + m1.setIdentifier( 1, "b" ); + m1.setIdentifier( 2, "c" ); + m1.setIdentifier( 3, "d" ); + m1.setIdentifier( 4, "e" ); + m1.setIdentifier( 5, "f" ); + m1.setIdentifier( 6, "g" ); + m1.setIdentifier( 7, "h" ); + m1.setIdentifier( 8, "i" ); + m1.setState( 0, 0, PRESENT ); + m1.setState( 1, 0, ABSENT ); + m1.setState( 2, 0, PRESENT ); + m1.setState( 3, 0, ABSENT ); + m1.setState( 4, 0, ABSENT ); + m1.setState( 5, 0, ABSENT ); + m1.setState( 6, 0, ABSENT ); + m1.setState( 7, 0, ABSENT ); + m1.setState( 8, 0, ABSENT ); + m1.setState( 0, 1, PRESENT ); + m1.setState( 1, 1, PRESENT ); + m1.setState( 2, 1, PRESENT ); + m1.setState( 3, 1, PRESENT ); + m1.setState( 4, 1, ABSENT ); + m1.setState( 5, 1, ABSENT ); + m1.setState( 6, 1, ABSENT ); + m1.setState( 7, 1, ABSENT ); + m1.setState( 8, 1, ABSENT ); + m1.setState( 0, 2, PRESENT ); + m1.setState( 1, 2, ABSENT ); + m1.setState( 2, 2, ABSENT ); + m1.setState( 3, 2, ABSENT ); + m1.setState( 4, 2, ABSENT ); + m1.setState( 5, 2, ABSENT ); + m1.setState( 6, 2, ABSENT ); + m1.setState( 7, 2, PRESENT ); + m1.setState( 8, 2, ABSENT ); + dollo9.execute( p9, m1 ); + if ( dollo9.getTotalGains() != 3 ) { + return false; + } + if ( dollo9.getTotalLosses() != 6 ) { + return false; + } + final DolloParsimony dollo10 = DolloParsimony.createInstance(); + final PhylogenyFactory factory10 = ParserBasedPhylogenyFactory.getInstance(); + final String p10_str = "((((((a,b)ab,c)ac,d)ad,(e,f)ef)af,(g,h)gh)ah,i)r"; + final Phylogeny p10 = factory10.create( p10_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m10 = new BasicCharacterStateMatrix( 9, + 1 ); + m10.setIdentifier( 0, "a" ); + m10.setIdentifier( 1, "b" ); + m10.setIdentifier( 2, "c" ); + m10.setIdentifier( 3, "d" ); + m10.setIdentifier( 4, "e" ); + m10.setIdentifier( 5, "f" ); + m10.setIdentifier( 6, "g" ); + m10.setIdentifier( 7, "h" ); + m10.setIdentifier( 8, "i" ); + m10.setState( 0, 0, PRESENT ); + m10.setState( 1, 0, ABSENT ); + m10.setState( 2, 0, PRESENT ); + m10.setState( 3, 0, ABSENT ); + m10.setState( 4, 0, ABSENT ); + m10.setState( 5, 0, ABSENT ); + m10.setState( 6, 0, ABSENT ); + m10.setState( 7, 0, ABSENT ); + m10.setState( 8, 0, ABSENT ); + dollo10.execute( p10, m10 ); + if ( dollo10.getTotalGains() != 1 ) { + return false; + } + if ( dollo10.getTotalLosses() != 1 ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testDolloParsimonyOnNonBinaryTree() { + try { + final BinaryStates PRESENT = BinaryStates.PRESENT; + final BinaryStates ABSENT = BinaryStates.ABSENT; + final DolloParsimony dollo1 = DolloParsimony.createInstance(); + final PhylogenyFactory factory1 = ParserBasedPhylogenyFactory.getInstance(); + final String p1_str = "((((((a,b,y)aby,c)ac,d)ad,(e,f)ef)af,(g,h)gh)ah,i)r"; + final Phylogeny p1 = factory1.create( p1_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m1 = new BasicCharacterStateMatrix( 10, + 1 ); + m1.setIdentifier( 0, "a" ); + m1.setIdentifier( 1, "b" ); + m1.setIdentifier( 2, "y" ); + m1.setIdentifier( 3, "c" ); + m1.setIdentifier( 4, "d" ); + m1.setIdentifier( 5, "e" ); + m1.setIdentifier( 6, "f" ); + m1.setIdentifier( 7, "g" ); + m1.setIdentifier( 8, "h" ); + m1.setIdentifier( 9, "i" ); + m1.setCharacter( 0, "0" ); + m1.setState( "a", "0", PRESENT ); + m1.setState( "b", "0", ABSENT ); + m1.setState( "y", "0", PRESENT ); + m1.setState( "c", "0", PRESENT ); + m1.setState( "d", "0", ABSENT ); + m1.setState( "e", "0", ABSENT ); + m1.setState( "f", "0", ABSENT ); + m1.setState( "g", "0", ABSENT ); + m1.setState( "h", "0", ABSENT ); + m1.setState( "i", "0", ABSENT ); + dollo1.execute( p1, m1 ); + if ( dollo1.getTotalGains() != 1 ) { + return false; + } + if ( dollo1.getTotalLosses() != 1 ) { + return false; + } + if ( dollo1.getTotalUnchanged() != 16 ) { + return false; + } + m1.setState( "b", "0", PRESENT ); + dollo1.execute( p1, m1 ); + if ( dollo1.getTotalGains() != 1 ) { + return false; + } + if ( dollo1.getTotalLosses() != 0 ) { + return false; + } + if ( dollo1.getTotalUnchanged() != 17 ) { + return false; + } + m1.setState( "a", "0", ABSENT ); + m1.setState( "b", "0", ABSENT ); + dollo1.execute( p1, m1 ); + if ( dollo1.getTotalGains() != 1 ) { + return false; + } + if ( dollo1.getTotalLosses() != 2 ) { + return false; + } + if ( dollo1.getTotalUnchanged() != 15 ) { + return false; + } + m1.setState( "y", "0", ABSENT ); + dollo1.execute( p1, m1 ); + if ( dollo1.getTotalGains() != 1 ) { + return false; + } + if ( dollo1.getTotalLosses() != 0 ) { + return false; + } + if ( dollo1.getTotalUnchanged() != 17 ) { + return false; + } + final DolloParsimony dollo2 = DolloParsimony.createInstance(); + final PhylogenyFactory factory2 = ParserBasedPhylogenyFactory.getInstance(); + final String p2_str = "((((((a,b,y)aby,c,d)cad,e,f)af,(g,h)gh)ah,i))r"; + final Phylogeny p2 = factory2.create( p2_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m2 = new BasicCharacterStateMatrix( 10, + 1 ); + m2.setIdentifier( 0, "a" ); + m2.setIdentifier( 1, "b" ); + m2.setIdentifier( 2, "y" ); + m2.setIdentifier( 3, "c" ); + m2.setIdentifier( 4, "d" ); + m2.setIdentifier( 5, "e" ); + m2.setIdentifier( 6, "f" ); + m2.setIdentifier( 7, "g" ); + m2.setIdentifier( 8, "h" ); + m2.setIdentifier( 9, "i" ); + m2.setCharacter( 0, "0" ); + m2.setState( "a", "0", PRESENT ); + m2.setState( "b", "0", ABSENT ); + m2.setState( "y", "0", PRESENT ); + m2.setState( "c", "0", PRESENT ); + m2.setState( "d", "0", ABSENT ); + m2.setState( "e", "0", ABSENT ); + m2.setState( "f", "0", ABSENT ); + m2.setState( "g", "0", ABSENT ); + m2.setState( "h", "0", ABSENT ); + m2.setState( "i", "0", ABSENT ); + dollo2.setReturnInternalStates( true ); + dollo2.execute( p2, m2 ); + CharacterStateMatrix i_m2 = dollo2.getInternalStatesMatrix(); + if ( i_m2.getState( "aby", "0" ) != PRESENT ) { + return false; + } + if ( i_m2.getState( "cad", "0" ) != PRESENT ) { + return false; + } + if ( i_m2.getState( "af", "0" ) != ABSENT ) { + return false; + } + if ( i_m2.getState( "gh", "0" ) != ABSENT ) { + return false; + } + if ( i_m2.getState( "ah", "0" ) != ABSENT ) { + return false; + } + if ( i_m2.getState( "r", "0" ) != ABSENT ) { + return false; + } + if ( dollo2.getTotalGains() != 1 ) { + return false; + } + if ( dollo2.getTotalLosses() != 2 ) { + return false; + } + if ( dollo2.getTotalUnchanged() != 14 ) { + return false; + } + m2.setState( "b", "0", PRESENT ); + dollo2.execute( p2, m2 ); + if ( dollo2.getTotalGains() != 1 ) { + return false; + } + if ( dollo2.getTotalLosses() != 1 ) { + return false; + } + if ( dollo2.getTotalUnchanged() != 15 ) { + return false; + } + m2.setState( "a", "0", ABSENT ); + m2.setState( "b", "0", ABSENT ); + dollo2.execute( p2, m2 ); + if ( dollo2.getTotalGains() != 1 ) { + return false; + } + if ( dollo2.getTotalLosses() != 3 ) { + return false; + } + if ( dollo2.getTotalUnchanged() != 13 ) { + return false; + } + m2.setState( "y", "0", ABSENT ); + dollo2.execute( p2, m2 ); + if ( dollo2.getTotalGains() != 1 ) { + return false; + } + if ( dollo2.getTotalLosses() != 0 ) { + return false; + } + if ( dollo2.getTotalUnchanged() != 16 ) { + return false; + } + m2.setState( "c", "0", ABSENT ); + dollo2.execute( p2, m2 ); + if ( dollo2.getTotalGains() != 0 ) { + return false; + } + if ( dollo2.getTotalLosses() != 0 ) { + return false; + } + if ( dollo2.getTotalUnchanged() != 17 ) { + return false; + } + m2.setState( "y", "0", PRESENT ); + m2.setState( "e", "0", PRESENT ); + dollo2.execute( p2, m2 ); + if ( dollo2.getTotalGains() != 1 ) { + return false; + } + if ( dollo2.getTotalLosses() != 5 ) { + return false; + } + if ( dollo2.getTotalUnchanged() != 11 ) { + return false; + } + i_m2 = dollo2.getInternalStatesMatrix(); + if ( i_m2.getState( "aby", "0" ) != PRESENT ) { + return false; + } + if ( i_m2.getState( "cad", "0" ) != PRESENT ) { + return false; + } + if ( i_m2.getState( "af", "0" ) != PRESENT ) { + return false; + } + if ( i_m2.getState( "gh", "0" ) != ABSENT ) { + return false; + } + if ( i_m2.getState( "ah", "0" ) != ABSENT ) { + return false; + } + if ( i_m2.getState( "r", "0" ) != ABSENT ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testFitchParsimony() { + try { + final BinaryStates PRESENT = BinaryStates.PRESENT; + final BinaryStates ABSENT = BinaryStates.ABSENT; + final GainLossStates GAIN = GainLossStates.GAIN; + final GainLossStates LOSS = GainLossStates.LOSS; + final GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT; + final GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT; + final FitchParsimony fitch1 = new FitchParsimony(); + final PhylogenyFactory factory1 = ParserBasedPhylogenyFactory.getInstance(); + final String p1_str = "((((((a,b)ab,c)ac,d)ad,(e,f)ef)af,(g,h,i)gi)ai,((j,k,l)jl,(m,n,o)mo,(p,q,r)pr)jr)root"; + final Phylogeny p1 = factory1.create( p1_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m1 = new BasicCharacterStateMatrix( 18, 1 ); + m1.setIdentifier( 0, "a" ); + m1.setIdentifier( 1, "b" ); + m1.setIdentifier( 2, "c" ); + m1.setIdentifier( 3, "d" ); + m1.setIdentifier( 4, "e" ); + m1.setIdentifier( 5, "f" ); + m1.setIdentifier( 6, "g" ); + m1.setIdentifier( 7, "h" ); + m1.setIdentifier( 8, "i" ); + m1.setIdentifier( 9, "j" ); + m1.setIdentifier( 10, "k" ); + m1.setIdentifier( 11, "l" ); + m1.setIdentifier( 12, "m" ); + m1.setIdentifier( 13, "n" ); + m1.setIdentifier( 14, "o" ); + m1.setIdentifier( 15, "p" ); + m1.setIdentifier( 16, "q" ); + m1.setIdentifier( 17, "r" ); + m1.setCharacter( 0, "0" ); + m1.setState( "a", "0", "A" ); + m1.setState( "b", "0", "A" ); + m1.setState( "c", "0", "B" ); + m1.setState( "d", "0", "C" ); + m1.setState( "e", "0", "D" ); + m1.setState( "f", "0", "A" ); + m1.setState( "g", "0", "A" ); + m1.setState( "h", "0", "B" ); + m1.setState( "i", "0", "C" ); + m1.setState( "j", "0", "A" ); + m1.setState( "k", "0", "B" ); + m1.setState( "l", "0", "C" ); + m1.setState( "m", "0", "B" ); + m1.setState( "n", "0", "B" ); + m1.setState( "o", "0", "B" ); + m1.setState( "p", "0", "A" ); + m1.setState( "q", "0", "C" ); + m1.setState( "r", "0", "D" ); + fitch1.setReturnInternalStates( true ); + fitch1.setReturnGainLossMatrix( false ); + fitch1.setRandomize( false ); + fitch1.execute( p1, m1 ); + final CharacterStateMatrix i_m = fitch1.getInternalStatesMatrix(); + final CharacterStateMatrix> i_m_all = fitch1.getInternalStatesMatrixPriorToTraceback(); + if ( fitch1.getCost() != 10 ) { + return false; + } + if ( !i_m.getState( "ab", "0" ).equals( "A" ) ) { + return false; + } + if ( !i_m.getState( "ac", "0" ).equals( "A" ) ) { + return false; + } + if ( !i_m.getState( "ad", "0" ).equals( "A" ) ) { + return false; + } + if ( !i_m.getState( "ef", "0" ).equals( "A" ) ) { + return false; + } + if ( !i_m.getState( "ai", "0" ).equals( "A" ) ) { + return false; + } + if ( !i_m.getState( "gi", "0" ).equals( "A" ) ) { + return false; + } + if ( !i_m.getState( "jl", "0" ).equals( "A" ) ) { + return false; + } + if ( !i_m.getState( "mo", "0" ).equals( "B" ) ) { + return false; + } + if ( !i_m.getState( "pr", "0" ).equals( "A" ) ) { + return false; + } + if ( i_m_all.getState( "ab", "0" ).size() != 1 ) { + return false; + } + if ( !i_m_all.getState( "ab", "0" ).contains( "A" ) ) { + return false; + } + if ( i_m_all.getState( "ac", "0" ).size() != 2 ) { + return false; + } + if ( !i_m_all.getState( "ac", "0" ).contains( "A" ) ) { + return false; + } + if ( !i_m_all.getState( "ac", "0" ).contains( "B" ) ) { + return false; + } + if ( i_m_all.getState( "ad", "0" ).size() != 3 ) { + return false; + } + if ( !i_m_all.getState( "ad", "0" ).contains( "A" ) ) { + return false; + } + if ( !i_m_all.getState( "ad", "0" ).contains( "B" ) ) { + return false; + } + if ( !i_m_all.getState( "ad", "0" ).contains( "C" ) ) { + return false; + } + if ( i_m_all.getState( "af", "0" ).size() != 1 ) { + return false; + } + if ( !i_m_all.getState( "af", "0" ).contains( "A" ) ) { + return false; + } + if ( i_m_all.getState( "ef", "0" ).size() != 2 ) { + return false; + } + if ( !i_m_all.getState( "ef", "0" ).contains( "A" ) ) { + return false; + } + if ( !i_m_all.getState( "ef", "0" ).contains( "D" ) ) { + return false; + } + if ( i_m_all.getState( "gi", "0" ).size() != 3 ) { + return false; + } + if ( !i_m_all.getState( "gi", "0" ).contains( "A" ) ) { + return false; + } + if ( !i_m_all.getState( "gi", "0" ).contains( "B" ) ) { + return false; + } + if ( !i_m_all.getState( "gi", "0" ).contains( "C" ) ) { + return false; + } + if ( i_m_all.getState( "ai", "0" ).size() != 1 ) { + return false; + } + if ( !i_m_all.getState( "ai", "0" ).contains( "A" ) ) { + return false; + } + if ( i_m_all.getState( "jl", "0" ).size() != 3 ) { + return false; + } + if ( !i_m_all.getState( "jl", "0" ).contains( "A" ) ) { + return false; + } + if ( !i_m_all.getState( "jl", "0" ).contains( "B" ) ) { + return false; + } + if ( !i_m_all.getState( "jl", "0" ).contains( "C" ) ) { + return false; + } + if ( i_m_all.getState( "mo", "0" ).size() != 1 ) { + return false; + } + if ( !i_m_all.getState( "mo", "0" ).contains( "B" ) ) { + return false; + } + if ( i_m_all.getState( "pr", "0" ).size() != 3 ) { + return false; + } + if ( !i_m_all.getState( "pr", "0" ).contains( "A" ) ) { + return false; + } + if ( !i_m_all.getState( "pr", "0" ).contains( "C" ) ) { + return false; + } + if ( !i_m_all.getState( "pr", "0" ).contains( "D" ) ) { + return false; + } + if ( i_m_all.getState( "jr", "0" ).size() != 4 ) { + return false; + } + if ( !i_m_all.getState( "jr", "0" ).contains( "A" ) ) { + return false; + } + if ( !i_m_all.getState( "jr", "0" ).contains( "B" ) ) { + return false; + } + if ( !i_m_all.getState( "jr", "0" ).contains( "C" ) ) { + return false; + } + if ( !i_m_all.getState( "jr", "0" ).contains( "D" ) ) { + return false; + } + final FitchParsimony fitch2 = new FitchParsimony(); + final PhylogenyFactory factory2 = ParserBasedPhylogenyFactory.getInstance(); + final String p2_str = "((a,b)ab,(c,(d,e)de)cde)r"; + final Phylogeny p2 = factory2.create( p2_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m2 = new BasicCharacterStateMatrix( 5, 1 ); + m2.setIdentifier( 0, "a" ); + m2.setIdentifier( 1, "b" ); + m2.setIdentifier( 2, "c" ); + m2.setIdentifier( 3, "d" ); + m2.setIdentifier( 4, "e" ); + m2.setCharacter( 0, "0" ); + m2.setState( "a", "0", "C" ); + m2.setState( "b", "0", "A" ); + m2.setState( "c", "0", "C" ); + m2.setState( "d", "0", "A" ); + m2.setState( "e", "0", "G" ); + fitch2.setReturnInternalStates( true ); + fitch2.setReturnGainLossMatrix( false ); + fitch2.execute( p2, m2 ); + final CharacterStateMatrix i_m2 = fitch2.getInternalStatesMatrix(); + final CharacterStateMatrix> i_m_all2 = fitch2.getInternalStatesMatrixPriorToTraceback(); + if ( fitch2.getCost() != 3 ) { + return false; + } + if ( !i_m2.getState( "ab", "0" ).equals( "A" ) ) { + return false; + } + if ( !i_m2.getState( "de", "0" ).equals( "A" ) ) { + return false; + } + if ( !i_m2.getState( "cde", "0" ).equals( "A" ) ) { + return false; + } + if ( !i_m2.getState( "r", "0" ).equals( "A" ) ) { + return false; + } + if ( i_m_all2.getState( "cde", "0" ).size() != 3 ) { + return false; + } + if ( !i_m_all2.getState( "cde", "0" ).contains( "A" ) ) { + return false; + } + if ( !i_m_all2.getState( "cde", "0" ).contains( "C" ) ) { + return false; + } + if ( !i_m_all2.getState( "cde", "0" ).contains( "G" ) ) { + return false; + } + if ( i_m_all2.getState( "ab", "0" ).size() != 2 ) { + return false; + } + if ( !i_m_all2.getState( "ab", "0" ).contains( "A" ) ) { + return false; + } + if ( !i_m_all2.getState( "ab", "0" ).contains( "C" ) ) { + return false; + } + fitch2.setReturnInternalStates( true ); + fitch2.setReturnGainLossMatrix( false ); + fitch2.setUseLast( true ); + fitch2.execute( p2, m2 ); + final CharacterStateMatrix i_m21 = fitch2.getInternalStatesMatrix(); + final CharacterStateMatrix> i_m_all21 = fitch2.getInternalStatesMatrixPriorToTraceback(); + if ( fitch2.getCost() != 3 ) { + return false; + } + if ( !i_m21.getState( "ab", "0" ).equals( "C" ) ) { + return false; + } + if ( !i_m21.getState( "de", "0" ).equals( "G" ) ) { + return false; + } + if ( !i_m21.getState( "cde", "0" ).equals( "C" ) ) { + return false; + } + if ( !i_m21.getState( "r", "0" ).equals( "C" ) ) { + return false; + } + if ( i_m_all21.getState( "cde", "0" ).size() != 3 ) { + return false; + } + if ( !i_m_all21.getState( "cde", "0" ).contains( "A" ) ) { + return false; + } + if ( !i_m_all21.getState( "cde", "0" ).contains( "C" ) ) { + return false; + } + if ( !i_m_all21.getState( "cde", "0" ).contains( "G" ) ) { + return false; + } + final FitchParsimony fitch3 = new FitchParsimony(); + final PhylogenyFactory factory3 = ParserBasedPhylogenyFactory.getInstance(); + final String p3_str = "(((a,b)ab,((c,d)cd,e)cde)abcde,f)r"; + final Phylogeny p3 = factory3.create( p3_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m3 = new BasicCharacterStateMatrix( 6, 1 ); + m3.setIdentifier( 0, "a" ); + m3.setIdentifier( 1, "b" ); + m3.setIdentifier( 2, "c" ); + m3.setIdentifier( 3, "d" ); + m3.setIdentifier( 4, "e" ); + m3.setIdentifier( 5, "f" ); + m3.setCharacter( 0, "0" ); + m3.setState( "a", "0", "C" ); + m3.setState( "b", "0", "U" ); + m3.setState( "c", "0", "G" ); + m3.setState( "d", "0", "U" ); + m3.setState( "e", "0", "A" ); + m3.setState( "f", "0", "A" ); + fitch3.setReturnInternalStates( true ); + fitch3.setReturnGainLossMatrix( false ); + fitch3.execute( p3, m3 ); + final CharacterStateMatrix i_m3 = fitch3.getInternalStatesMatrix(); + final CharacterStateMatrix> i_m_all3 = fitch3.getInternalStatesMatrixPriorToTraceback(); + if ( fitch3.getCost() != 4 ) { + return false; + } + if ( !i_m3.getState( "ab", "0" ).equals( "U" ) ) { + return false; + } + if ( !i_m3.getState( "cd", "0" ).equals( "U" ) ) { + return false; + } + if ( !i_m3.getState( "cde", "0" ).equals( "U" ) ) { + return false; + } + if ( !i_m3.getState( "abcde", "0" ).equals( "U" ) ) { + return false; + } + if ( !i_m3.getState( "r", "0" ).equals( "A" ) ) { + return false; + } + if ( i_m_all3.getState( "cde", "0" ).size() != 3 ) { + return false; + } + if ( !i_m_all3.getState( "cde", "0" ).contains( "A" ) ) { + return false; + } + if ( !i_m_all3.getState( "cde", "0" ).contains( "G" ) ) { + return false; + } + if ( !i_m_all3.getState( "cde", "0" ).contains( "U" ) ) { + return false; + } + if ( i_m_all3.getState( "ab", "0" ).size() != 2 ) { + return false; + } + if ( !i_m_all3.getState( "ab", "0" ).contains( "C" ) ) { + return false; + } + if ( !i_m_all3.getState( "ab", "0" ).contains( "U" ) ) { + return false; + } + if ( i_m_all3.getState( "cd", "0" ).size() != 2 ) { + return false; + } + if ( !i_m_all3.getState( "cd", "0" ).contains( "G" ) ) { + return false; + } + if ( !i_m_all3.getState( "cd", "0" ).contains( "U" ) ) { + return false; + } + if ( i_m_all3.getState( "abcde", "0" ).size() != 1 ) { + return false; + } + if ( !i_m_all3.getState( "abcde", "0" ).contains( "U" ) ) { + return false; + } + if ( i_m_all3.getState( "r", "0" ).size() != 2 ) { + return false; + } + if ( !i_m_all3.getState( "r", "0" ).contains( "A" ) ) { + return false; + } + if ( !i_m_all3.getState( "r", "0" ).contains( "U" ) ) { + return false; + } + final FitchParsimony fitch4 = new FitchParsimony(); + final PhylogenyFactory factory4 = ParserBasedPhylogenyFactory.getInstance(); + final String p4_str = "(((a,b)ab,((c,d)cd,e)cde)abcde,f)r"; + final Phylogeny p4 = factory4.create( p4_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m4 = new BasicCharacterStateMatrix( 6, 1 ); + m4.setIdentifier( 0, "a" ); + m4.setIdentifier( 1, "b" ); + m4.setIdentifier( 2, "c" ); + m4.setIdentifier( 3, "d" ); + m4.setIdentifier( 4, "e" ); + m4.setIdentifier( 5, "f" ); + m4.setCharacter( 0, "0" ); + m4.setState( "a", "0", PRESENT ); + m4.setState( "b", "0", ABSENT ); + m4.setState( "c", "0", PRESENT ); + m4.setState( "d", "0", PRESENT ); + m4.setState( "e", "0", ABSENT ); + m4.setState( "f", "0", ABSENT ); + fitch4.setReturnInternalStates( true ); + fitch4.setReturnGainLossMatrix( true ); + fitch4.execute( p4, m4 ); + final CharacterStateMatrix gl_m_4 = fitch4.getGainLossMatrix(); + if ( fitch4.getCost() != 2 ) { + return false; + } + if ( fitch4.getTotalLosses() != 0 ) { + return false; + } + if ( fitch4.getTotalGains() != 2 ) { + return false; + } + if ( fitch4.getTotalUnchanged() != 9 ) { + return false; + } + if ( gl_m_4.getState( "a", "0" ) != GAIN ) { + return false; + } + if ( gl_m_4.getState( "b", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_4.getState( "ab", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_4.getState( "cd", "0" ) != GAIN ) { + return false; + } + if ( gl_m_4.getState( "r", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + final FitchParsimony fitch5 = new FitchParsimony(); + final PhylogenyFactory factory5 = ParserBasedPhylogenyFactory.getInstance(); + final String p5_str = "(((a,b)ab,((c,d)cd,e)cde)abcde,f)r"; + final Phylogeny p5 = factory5.create( p5_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m5 = new BasicCharacterStateMatrix( 6, 1 ); + m5.setIdentifier( 0, "a" ); + m5.setIdentifier( 1, "b" ); + m5.setIdentifier( 2, "c" ); + m5.setIdentifier( 3, "d" ); + m5.setIdentifier( 4, "e" ); + m5.setIdentifier( 5, "f" ); + m5.setCharacter( 0, "0" ); + m5.setState( "a", "0", PRESENT ); + m5.setState( "b", "0", ABSENT ); + m5.setState( "c", "0", PRESENT ); + m5.setState( "d", "0", ABSENT ); + m5.setState( "e", "0", PRESENT ); + m5.setState( "f", "0", ABSENT ); + fitch5.setReturnInternalStates( true ); + fitch5.setReturnGainLossMatrix( true ); + fitch5.execute( p5, m5 ); + final CharacterStateMatrix gl_m_5 = fitch5.getGainLossMatrix(); + if ( fitch5.getCost() != 3 ) { + return false; + } + if ( fitch5.getTotalLosses() != 2 ) { + return false; + } + if ( fitch5.getTotalGains() != 1 ) { + return false; + } + if ( fitch5.getTotalUnchanged() != 8 ) { + return false; + } + if ( gl_m_5.getState( "abcde", "0" ) != GAIN ) { + return false; + } + if ( gl_m_5.getState( "a", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + if ( gl_m_5.getState( "b", "0" ) != LOSS ) { + return false; + } + if ( gl_m_5.getState( "d", "0" ) != LOSS ) { + return false; + } + if ( gl_m_5.getState( "r", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + final FitchParsimony fitch6 = new FitchParsimony(); + final PhylogenyFactory factory6 = ParserBasedPhylogenyFactory.getInstance(); + final String p6_str = "(((a,b)ab,((c,d)cd,e)cde)abcde,f)r"; + final Phylogeny p6 = factory6.create( p6_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m6 = new BasicCharacterStateMatrix( 6, 1 ); + m6.setIdentifier( 0, "a" ); + m6.setIdentifier( 1, "b" ); + m6.setIdentifier( 2, "c" ); + m6.setIdentifier( 3, "d" ); + m6.setIdentifier( 4, "e" ); + m6.setIdentifier( 5, "f" ); + m6.setCharacter( 0, "0" ); + m6.setState( "a", "0", PRESENT ); + m6.setState( "b", "0", ABSENT ); + m6.setState( "c", "0", PRESENT ); + m6.setState( "d", "0", PRESENT ); + m6.setState( "e", "0", ABSENT ); + m6.setState( "f", "0", PRESENT ); + fitch6.setReturnInternalStates( true ); + fitch6.setReturnGainLossMatrix( true ); + fitch6.execute( p6, m6 ); + final CharacterStateMatrix gl_m_6 = fitch6.getGainLossMatrix(); + if ( fitch6.getCost() != 2 ) { + return false; + } + if ( fitch6.getTotalLosses() != 2 ) { + return false; + } + if ( fitch6.getTotalGains() != 0 ) { + return false; + } + if ( fitch6.getTotalUnchanged() != 9 ) { + return false; + } + if ( gl_m_6.getState( "abcde", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + if ( gl_m_6.getState( "r", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + if ( gl_m_6.getState( "b", "0" ) != LOSS ) { + return false; + } + if ( gl_m_6.getState( "e", "0" ) != LOSS ) { + return false; + } + final FitchParsimony fitch7 = new FitchParsimony(); + final PhylogenyFactory factory7 = ParserBasedPhylogenyFactory.getInstance(); + final String p7_str = "(((a,b)ab,(c,d)cd)abcd,((e,f)ef,(g,h)gh)efgh)r"; + final Phylogeny p7 = factory7.create( p7_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m7 = new BasicCharacterStateMatrix( 8, 1 ); + m7.setIdentifier( 0, "a" ); + m7.setIdentifier( 1, "b" ); + m7.setIdentifier( 2, "c" ); + m7.setIdentifier( 3, "d" ); + m7.setIdentifier( 4, "e" ); + m7.setIdentifier( 5, "f" ); + m7.setIdentifier( 6, "g" ); + m7.setIdentifier( 7, "h" ); + m7.setCharacter( 0, "0" ); + m7.setState( "a", "0", PRESENT ); + m7.setState( "b", "0", ABSENT ); + m7.setState( "c", "0", PRESENT ); + m7.setState( "d", "0", ABSENT ); + m7.setState( "e", "0", PRESENT ); + m7.setState( "f", "0", ABSENT ); + m7.setState( "g", "0", PRESENT ); + m7.setState( "h", "0", ABSENT ); + fitch7.setReturnInternalStates( true ); + fitch7.setReturnGainLossMatrix( true ); + fitch7.execute( p7, m7 ); + final CharacterStateMatrix gl_m_7 = fitch7.getGainLossMatrix(); + if ( fitch7.getCost() != 4 ) { + return false; + } + if ( fitch7.getTotalLosses() != 0 ) { + return false; + } + if ( fitch7.getTotalGains() != 4 ) { + return false; + } + if ( fitch7.getTotalUnchanged() != 11 ) { + return false; + } + if ( gl_m_7.getState( "a", "0" ) != GAIN ) { + return false; + } + if ( gl_m_7.getState( "c", "0" ) != GAIN ) { + return false; + } + if ( gl_m_7.getState( "e", "0" ) != GAIN ) { + return false; + } + if ( gl_m_7.getState( "g", "0" ) != GAIN ) { + return false; + } + if ( gl_m_7.getState( "r", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + fitch7.setReturnInternalStates( true ); + fitch7.setReturnGainLossMatrix( true ); + fitch7.setUseLast( true ); + fitch7.execute( p7, m7 ); + final CharacterStateMatrix gl_m_71 = fitch7.getGainLossMatrix(); + if ( fitch7.getCost() != 4 ) { + return false; + } + if ( fitch7.getTotalLosses() != 4 ) { + return false; + } + if ( fitch7.getTotalGains() != 0 ) { + return false; + } + if ( fitch7.getTotalUnchanged() != 11 ) { + return false; + } + if ( gl_m_71.getState( "b", "0" ) != LOSS ) { + return false; + } + if ( gl_m_71.getState( "d", "0" ) != LOSS ) { + return false; + } + if ( gl_m_71.getState( "f", "0" ) != LOSS ) { + return false; + } + if ( gl_m_71.getState( "h", "0" ) != LOSS ) { + return false; + } + if ( gl_m_71.getState( "r", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + final FitchParsimony fitch8 = new FitchParsimony(); + final PhylogenyFactory factory8 = ParserBasedPhylogenyFactory.getInstance(); + final String p8_str = "(((a,b)ab,(c,d)cd)abcd,((e,f)ef,(g,h)gh)efgh)r"; + final Phylogeny p8 = factory8.create( p8_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m8 = new BasicCharacterStateMatrix( 8, 1 ); + m8.setIdentifier( 0, "a" ); + m8.setIdentifier( 1, "b" ); + m8.setIdentifier( 2, "c" ); + m8.setIdentifier( 3, "d" ); + m8.setIdentifier( 4, "e" ); + m8.setIdentifier( 5, "f" ); + m8.setIdentifier( 6, "g" ); + m8.setIdentifier( 7, "h" ); + m8.setCharacter( 0, "0" ); + m8.setState( "a", "0", PRESENT ); + m8.setState( "b", "0", PRESENT ); + m8.setState( "c", "0", PRESENT ); + m8.setState( "d", "0", ABSENT ); + m8.setState( "e", "0", ABSENT ); + m8.setState( "f", "0", ABSENT ); + m8.setState( "g", "0", ABSENT ); + m8.setState( "h", "0", ABSENT ); + fitch8.setReturnInternalStates( true ); + fitch8.setReturnGainLossMatrix( true ); + fitch8.execute( p8, m8 ); + final CharacterStateMatrix gl_m_8 = fitch8.getGainLossMatrix(); + if ( fitch8.getCost() != 2 ) { + return false; + } + if ( fitch8.getTotalLosses() != 1 ) { + return false; + } + if ( fitch8.getTotalGains() != 1 ) { + return false; + } + if ( fitch8.getTotalUnchanged() != 13 ) { + return false; + } + if ( gl_m_8.getState( "d", "0" ) != LOSS ) { + return false; + } + if ( gl_m_8.getState( "abcd", "0" ) != GAIN ) { + return false; + } + final FitchParsimony fitch9 = new FitchParsimony(); + final PhylogenyFactory factory9 = ParserBasedPhylogenyFactory.getInstance(); + final String p9_str = "(((a,b)ab,c)abc,d)abcd"; + final Phylogeny p9 = factory9.create( p9_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m9 = new BasicCharacterStateMatrix( 4, 1 ); + m9.setIdentifier( 0, "a" ); + m9.setIdentifier( 1, "b" ); + m9.setIdentifier( 2, "c" ); + m9.setIdentifier( 3, "d" ); + m9.setCharacter( 0, "0" ); + m9.setState( "a", "0", PRESENT ); + m9.setState( "b", "0", ABSENT ); + m9.setState( "c", "0", PRESENT ); + m9.setState( "d", "0", ABSENT ); + fitch9.setReturnInternalStates( true ); + fitch9.setReturnGainLossMatrix( true ); + fitch9.setUseLast( false ); + fitch9.execute( p9, m9 ); + final CharacterStateMatrix gl_m_9a = fitch9.getGainLossMatrix(); + if ( fitch9.getCost() != 2 ) { + return false; + } + if ( fitch9.getTotalLosses() != 1 ) { + return false; + } + if ( fitch9.getTotalGains() != 1 ) { + return false; + } + if ( fitch9.getTotalUnchanged() != 5 ) { + return false; + } + if ( gl_m_9a.getState( "a", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + if ( gl_m_9a.getState( "b", "0" ) != LOSS ) { + return false; + } + if ( gl_m_9a.getState( "c", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + if ( gl_m_9a.getState( "d", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_9a.getState( "ab", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + if ( gl_m_9a.getState( "abc", "0" ) != GAIN ) { + return false; + } + if ( gl_m_9a.getState( "abcd", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + fitch9.setUseLast( true ); + fitch9.execute( p9, m9 ); + final CharacterStateMatrix gl_m_9b = fitch9.getGainLossMatrix(); + if ( fitch9.getCost() != 2 ) { + return false; + } + if ( fitch9.getTotalLosses() != 2 ) { + return false; + } + if ( fitch9.getTotalGains() != 0 ) { + return false; + } + if ( fitch9.getTotalUnchanged() != 5 ) { + return false; + } + if ( gl_m_9b.getState( "a", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + if ( gl_m_9b.getState( "b", "0" ) != LOSS ) { + return false; + } + if ( gl_m_9b.getState( "c", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + if ( gl_m_9b.getState( "d", "0" ) != LOSS ) { + return false; + } + if ( gl_m_9b.getState( "ab", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + if ( gl_m_9b.getState( "abc", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + if ( gl_m_9b.getState( "abcd", "0" ) != UNCHANGED_PRESENT ) { + return false; + } + fitch9.setUseLast( false ); + fitch9.setRandomize( true ); + fitch9.setRandomNumberSeed( 8722445 ); + fitch9.execute( p9, m9 ); + fitch9.getGainLossMatrix(); + if ( fitch9.getCost() != 2 ) { + return false; + } + if ( fitch9.getTotalLosses() != 1 ) { + return false; + } + if ( fitch9.getTotalGains() != 1 ) { + return false; + } + if ( fitch9.getTotalUnchanged() != 5 ) { + return false; + } + final FitchParsimony fitch10 = new FitchParsimony(); + final PhylogenyFactory factory10 = ParserBasedPhylogenyFactory.getInstance(); + final String p10_str = "((((a,b)ab,c)abc,d)abcd,e)abcde"; + final Phylogeny p10 = factory10.create( p10_str, new NHXParser() )[ 0 ]; + final CharacterStateMatrix m10 = new BasicCharacterStateMatrix( 5, 1 ); + m10.setIdentifier( 0, "a" ); + m10.setIdentifier( 1, "b" ); + m10.setIdentifier( 2, "c" ); + m10.setIdentifier( 3, "d" ); + m10.setIdentifier( 4, "e" ); + m10.setCharacter( 0, "0" ); + m10.setState( "a", "0", PRESENT ); + m10.setState( "b", "0", ABSENT ); + m10.setState( "c", "0", ABSENT ); + m10.setState( "d", "0", PRESENT ); + m10.setState( "e", "0", ABSENT ); + fitch10.setReturnInternalStates( true ); + fitch10.setReturnGainLossMatrix( true ); + fitch10.setUseLast( false ); + fitch10.execute( p10, m10 ); + final CharacterStateMatrix gl_m_10a = fitch10.getGainLossMatrix(); + if ( fitch10.getCost() != 2 ) { + return false; + } + if ( fitch10.getTotalLosses() != 0 ) { + return false; + } + if ( fitch10.getTotalGains() != 2 ) { + return false; + } + if ( fitch10.getTotalUnchanged() != 7 ) { + return false; + } + if ( gl_m_10a.getState( "a", "0" ) != GAIN ) { + return false; + } + if ( gl_m_10a.getState( "b", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_10a.getState( "c", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_10a.getState( "d", "0" ) != GAIN ) { + return false; + } + if ( gl_m_10a.getState( "e", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_10a.getState( "ab", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_10a.getState( "abc", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_10a.getState( "abcd", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_10a.getState( "abcde", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + fitch10.setUseLast( true ); + fitch10.execute( p10, m10 ); + final CharacterStateMatrix gl_m_10b = fitch10.getGainLossMatrix(); + if ( fitch10.getCost() != 2 ) { + return false; + } + if ( fitch10.getTotalLosses() != 0 ) { + return false; + } + if ( fitch10.getTotalGains() != 2 ) { + return false; + } + if ( fitch10.getTotalUnchanged() != 7 ) { + return false; + } + if ( gl_m_10b.getState( "a", "0" ) != GAIN ) { + return false; + } + if ( gl_m_10b.getState( "b", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_10b.getState( "c", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_10b.getState( "d", "0" ) != GAIN ) { + return false; + } + if ( gl_m_10b.getState( "e", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_10b.getState( "ab", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_10b.getState( "abc", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_10b.getState( "abcd", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + if ( gl_m_10b.getState( "abcde", "0" ) != UNCHANGED_ABSENT ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testNeighborJoining() { + try { + BasicSymmetricalDistanceMatrix m = new BasicSymmetricalDistanceMatrix( 6 ); + m.setRow( "5", 1 ); + m.setRow( "4 7", 2 ); + m.setRow( "7 10 7", 3 ); + m.setRow( "6 9 6 5", 4 ); + m.setRow( "8 11 8 9 8", 5 ); + m.setIdentifier( 0, "A" ); + m.setIdentifier( 1, "B" ); + m.setIdentifier( 2, "C" ); + m.setIdentifier( 3, "D" ); + m.setIdentifier( 4, "E" ); + m.setIdentifier( 5, "F" ); + final NeighborJoining nj = NeighborJoining.createInstance(); + nj.setVerbose( false ); + nj.execute( m ); + m = new BasicSymmetricalDistanceMatrix( 7 ); + m.setIdentifier( 0, "Bovine" ); + m.setIdentifier( 1, "Mouse" ); + m.setIdentifier( 2, "Gibbon" ); + m.setIdentifier( 3, "Orang" ); + m.setIdentifier( 4, "Gorilla" ); + m.setIdentifier( 5, "Chimp" ); + m.setIdentifier( 6, "Human" ); + m.setRow( "0.00000 1.68660 1.71980 1.66060 1.52430 1.60430 1.59050", 0 ); + m.setRow( "1.68660 0.00000 1.52320 1.48410 1.44650 1.43890 1.46290", 1 ); + m.setRow( "1.71980 1.52320 0.00000 0.71150 0.59580 0.61790 0.55830", 2 ); + m.setRow( "1.66060 1.48410 0.71150 0.00000 0.46310 0.50610 0.47100", 3 ); + m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 ); + m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 ); + m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 ); + nj.execute( m ); + m = new BasicSymmetricalDistanceMatrix( 4 ); + m.setIdentifier( 0, "A" ); + m.setIdentifier( 1, "B" ); + m.setIdentifier( 2, "C" ); + m.setIdentifier( 3, "D" ); + m.setRow( "0.00 0.95 0.17 0.98", 0 ); + m.setRow( "0.95 0.00 1.02 1.83", 1 ); + m.setRow( "0.17 1.02 0.00 1.01", 2 ); + m.setRow( "0.98 1.83 1.01 0.00", 3 ); + final Phylogeny p3 = nj.execute( m ); + // + // -- A 0.05 + // - |0.01 + // ----------------------- B 0.90 + // + // --- C 0.10 + // - |0.01 + // ------------------------- D 0.91 + p3.reRoot( p3.getNode( "C" ).getParent() ); + if ( !isEqual( p3.getNode( "A" ).getDistanceToParent(), 0.05 ) ) { + return false; + } + if ( !isEqual( p3.getNode( "B" ).getDistanceToParent(), 0.90 ) ) { + return false; + } + if ( !isEqual( p3.getNode( "C" ).getDistanceToParent(), 0.10 ) ) { + return false; + } + if ( !isEqual( p3.getNode( "D" ).getDistanceToParent(), 0.91 ) ) { + return false; + } + if ( TIME ) { + timeNeighborJoining(); + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static boolean testSymmetricalDistanceMatrixParser() { + try { + final String l = ForesterUtil.getLineSeparator(); + StringBuffer source = new StringBuffer(); + source.append( " 4" + l ); + source.append( "A 0 0 0 0" + l ); + source.append( "B 1 0 0 0" + l ); + source.append( "C 2 4 0 0" + l ); + source.append( "D 3 5 6 0" + l ); + source.append( l ); + source.append( " 4" + l ); + source.append( "A 0 11 12 13" + l ); + source.append( "B 11 0 14 15" + l ); + source.append( "C 12 14 0 16" + l ); + source.append( "D 13 15 16 0" + l ); + source.append( l ); + source.append( l ); + source.append( " " + l ); + source.append( " 4" + l ); + source.append( " A 0 " + l ); + source.append( " B 21 0" + l ); + source.append( " C 22 24 0 " + l ); + source.append( " # 2 222 2 2 " + l ); + source.append( " D 23 25 26 0" + l ); + source.append( l ); + source.append( l ); + source.append( " " + l ); + final SymmetricalDistanceMatrixParser p0 = SymmetricalDistanceMatrixParser.createInstance(); + final DistanceMatrix[] ma0 = p0.parse( source.toString() ); + if ( ma0.length != 3 ) { + return false; + } + if ( !isEqual( ma0[ 0 ].getValue( 0, 0 ), 0 ) ) { + return false; + } + if ( !isEqual( ma0[ 0 ].getValue( 1, 0 ), 1 ) ) { + return false; + } + if ( !isEqual( ma0[ 0 ].getValue( 2, 0 ), 2 ) ) { + return false; + } + if ( !isEqual( ma0[ 0 ].getValue( 3, 0 ), 3 ) ) { + return false; + } + if ( !isEqual( ma0[ 0 ].getValue( 0, 1 ), 1 ) ) { + return false; + } + if ( !isEqual( ma0[ 0 ].getValue( 1, 1 ), 0 ) ) { + return false; + } + if ( !isEqual( ma0[ 0 ].getValue( 2, 1 ), 4 ) ) { + return false; + } + if ( !isEqual( ma0[ 0 ].getValue( 3, 1 ), 5 ) ) { + return false; + } + if ( !isEqual( ma0[ 1 ].getValue( 0, 0 ), 0 ) ) { + return false; + } + if ( !isEqual( ma0[ 1 ].getValue( 1, 0 ), 11 ) ) { + return false; + } + if ( !isEqual( ma0[ 1 ].getValue( 2, 0 ), 12 ) ) { + return false; + } + if ( !isEqual( ma0[ 1 ].getValue( 3, 0 ), 13 ) ) { + return false; + } + if ( !isEqual( ma0[ 1 ].getValue( 0, 1 ), 11 ) ) { + return false; + } + if ( !isEqual( ma0[ 1 ].getValue( 1, 1 ), 0 ) ) { + return false; + } + if ( !isEqual( ma0[ 1 ].getValue( 2, 1 ), 14 ) ) { + return false; + } + if ( !isEqual( ma0[ 1 ].getValue( 3, 1 ), 15 ) ) { + return false; + } + if ( !isEqual( ma0[ 2 ].getValue( 0, 0 ), 0 ) ) { + return false; + } + if ( !isEqual( ma0[ 2 ].getValue( 1, 0 ), 21 ) ) { + return false; + } + if ( !isEqual( ma0[ 2 ].getValue( 2, 0 ), 22 ) ) { + return false; + } + if ( !isEqual( ma0[ 2 ].getValue( 3, 0 ), 23 ) ) { + return false; + } + if ( !isEqual( ma0[ 2 ].getValue( 0, 1 ), 21 ) ) { + return false; + } + if ( !isEqual( ma0[ 2 ].getValue( 1, 1 ), 0 ) ) { + return false; + } + if ( !isEqual( ma0[ 2 ].getValue( 2, 1 ), 24 ) ) { + return false; + } + if ( !isEqual( ma0[ 2 ].getValue( 3, 1 ), 25 ) ) { + return false; + } + source = new StringBuffer(); + source.append( "A 0 0 0 0" + l ); + source.append( "B 1 0 0 0" + l ); + source.append( "C 2 4 0 0" + l ); + source.append( "D 3 5 6 0" + l ); + source.append( " " + l ); + source.append( "A 0 11 12 13" + l ); + source.append( "B 11 0 14 15" + l ); + source.append( "C 12 14 0 16" + l ); + source.append( "D 13 15 16 0" + l ); + source.append( l ); + source.append( " A 0 " + l ); + source.append( " B 21 0" + l ); + source.append( " C 22 24 0 " + l ); + source.append( " # 2 222 2 2 " + l ); + source.append( " D 23 25 26 0" + l ); + final DistanceMatrix[] ma1 = p0.parse( source.toString() ); + if ( ma1.length != 3 ) { + return false; + } + if ( !isEqual( ma1[ 0 ].getValue( 0, 0 ), 0 ) ) { + return false; + } + if ( !isEqual( ma1[ 0 ].getValue( 1, 0 ), 1 ) ) { + return false; + } + if ( !isEqual( ma1[ 0 ].getValue( 2, 0 ), 2 ) ) { + return false; + } + if ( !isEqual( ma1[ 0 ].getValue( 3, 0 ), 3 ) ) { + return false; + } + if ( !isEqual( ma1[ 0 ].getValue( 0, 1 ), 1 ) ) { + return false; + } + if ( !isEqual( ma1[ 0 ].getValue( 1, 1 ), 0 ) ) { + return false; + } + if ( !isEqual( ma1[ 0 ].getValue( 2, 1 ), 4 ) ) { + return false; + } + if ( !isEqual( ma1[ 0 ].getValue( 3, 1 ), 5 ) ) { + return false; + } + if ( !isEqual( ma1[ 1 ].getValue( 0, 0 ), 0 ) ) { + return false; + } + if ( !isEqual( ma1[ 1 ].getValue( 1, 0 ), 11 ) ) { + return false; + } + if ( !isEqual( ma1[ 1 ].getValue( 2, 0 ), 12 ) ) { + return false; + } + if ( !isEqual( ma1[ 1 ].getValue( 3, 0 ), 13 ) ) { + return false; + } + if ( !isEqual( ma1[ 1 ].getValue( 0, 1 ), 11 ) ) { + return false; + } + if ( !isEqual( ma1[ 1 ].getValue( 1, 1 ), 0 ) ) { + return false; + } + if ( !isEqual( ma1[ 1 ].getValue( 2, 1 ), 14 ) ) { + return false; + } + if ( !isEqual( ma1[ 1 ].getValue( 3, 1 ), 15 ) ) { + return false; + } + if ( !isEqual( ma1[ 2 ].getValue( 0, 0 ), 0 ) ) { + return false; + } + if ( !isEqual( ma1[ 2 ].getValue( 1, 0 ), 21 ) ) { + return false; + } + if ( !isEqual( ma1[ 2 ].getValue( 2, 0 ), 22 ) ) { + return false; + } + if ( !isEqual( ma1[ 2 ].getValue( 3, 0 ), 23 ) ) { + return false; + } + if ( !isEqual( ma1[ 2 ].getValue( 0, 1 ), 21 ) ) { + return false; + } + if ( !isEqual( ma1[ 2 ].getValue( 1, 1 ), 0 ) ) { + return false; + } + if ( !isEqual( ma1[ 2 ].getValue( 2, 1 ), 24 ) ) { + return false; + } + if ( !isEqual( ma1[ 2 ].getValue( 3, 1 ), 25 ) ) { + return false; + } + source = new StringBuffer(); + source.append( "A 0" + l ); + source.append( "B 10 0" + l ); + final DistanceMatrix[] ma2 = p0.parse( source.toString() ); + if ( ma2.length != 1 ) { + return false; + } + if ( !isEqual( ma2[ 0 ].getValue( 0, 1 ), 10 ) ) { + return false; + } + source = new StringBuffer(); + source.append( " " + l ); + source.append( "#" + l ); + final DistanceMatrix[] ma3 = p0.parse( source.toString() ); + if ( ma3.length != 0 ) { + return false; + } + source = new StringBuffer(); + source.append( " " + l ); + source.append( "A 0 11 12 13" + l ); + source.append( "B 0 14 15" + l ); + source.append( "C 0 16" + l ); + source.append( "D 0" + l ); + source.append( l ); + source.append( "A 0 21 22 23" + l ); + source.append( "B 0 24 25" + l ); + source.append( "C 0 26" + l ); + source.append( "D 0" + l ); + p0.setInputMatrixType( SymmetricalDistanceMatrixParser.InputMatrixType.UPPER_TRIANGLE ); + final DistanceMatrix[] ma4 = p0.parse( source ); + if ( ma4.length != 2 ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 0, 0 ), 0 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 1, 0 ), 11 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 2, 0 ), 12 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 3, 0 ), 13 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 0, 1 ), 11 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 1, 1 ), 0 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 2, 1 ), 14 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 3, 1 ), 15 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 0, 2 ), 12 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 1, 2 ), 14 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 2, 2 ), 0 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 3, 2 ), 16 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 0, 3 ), 13 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 1, 3 ), 15 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 2, 3 ), 16 ) ) { + return false; + } + if ( !isEqual( ma4[ 0 ].getValue( 3, 3 ), 0 ) ) { + return false; + } + source = new StringBuffer(); + source.append( " 4 " + l ); + source.append( "A 0 11 12 13" + l ); + source.append( "B 0 14 15" + l ); + source.append( "C 0 16" + l ); + source.append( "D 0" + l ); + source.append( " 4" + l ); + source.append( "A 0 21 22 23" + l ); + source.append( "B 0 24 25" + l ); + source.append( "C 0 26" + l ); + source.append( "D 0" + l ); + source.append( " " + l ); + source.append( " 4" + l ); + source.append( "A 0 21 22 23" + l ); + source.append( "B 0 24 25" + l ); + source.append( "C 0 26" + l ); + source.append( "D 0" + l ); + source.append( l ); + source.append( "A 0 21 22 23" + l ); + source.append( "B 0 24 25" + l ); + source.append( "C 0 26" + l ); + source.append( "D 0" + l ); + p0.setInputMatrixType( SymmetricalDistanceMatrixParser.InputMatrixType.UPPER_TRIANGLE ); + final DistanceMatrix[] ma5 = p0.parse( source ); + if ( ma5.length != 4 ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 0, 0 ), 0 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 1, 0 ), 11 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 2, 0 ), 12 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 3, 0 ), 13 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 0, 1 ), 11 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 1, 1 ), 0 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 2, 1 ), 14 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 3, 1 ), 15 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 0, 2 ), 12 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 1, 2 ), 14 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 2, 2 ), 0 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 3, 2 ), 16 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 0, 3 ), 13 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 1, 3 ), 15 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 2, 3 ), 16 ) ) { + return false; + } + if ( !isEqual( ma5[ 0 ].getValue( 3, 3 ), 0 ) ) { + return false; + } + } + catch ( final Exception e ) { + e.printStackTrace( System.out ); + return false; + } + return true; + } + + private static void timeNeighborJoining() { + final NeighborJoining nj = NeighborJoining.createInstance(); + for( int n = 3; n <= 12; ++n ) { + final int x = ( int ) Math.pow( 2, n ); + final BasicSymmetricalDistanceMatrix mt = new BasicSymmetricalDistanceMatrix( x ); + mt.randomize( new Date().getTime() ); + final long start_time = new Date().getTime(); + nj.execute( mt ); + System.out.println( "Size: " + x + " -> " + ( new Date().getTime() - start_time ) + "ms." ); + } + } +} diff --git a/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java b/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java new file mode 100644 index 0000000..13f99e8 --- /dev/null +++ b/forester/java/src/org/forester/evoinference/distance/NeighborJoining.java @@ -0,0 +1,227 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.evoinference.distance; + +import java.util.ArrayList; +import java.util.List; + +import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; +import org.forester.evoinference.matrix.distance.DistanceMatrix; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.PhylogenyNodeI; +import org.forester.util.ForesterUtil; + +public class NeighborJoining { + + private final static boolean VERBOSE_DEFAULT = false; + private DistanceMatrix _d; + private DistanceMatrix _m; + private double[] _r; + private int _n; + private PhylogenyNodeI[] _external_nodes; + private int[] _mappings; + private boolean _verbose; + + public NeighborJoining() { + init(); + } + + private void calculateDistancesFromNewNode( final int otu1, final int otu2, final double d ) { + for( int i = 0; i < _n; ++i ) { + if ( ( i == otu1 ) || ( i == otu2 ) ) { + continue; + } + final double nd = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2; + setValueInD( nd, otu1, i ); + } + } + + private double calculateM( final int i, final int j ) { + return getValueFromD( i, j ) - ( _r[ i ] + _r[ j ] ) / ( _n - 2 ); + } + + private void calculateNetDivergences() { + for( int i = 0; i < _n; ++i ) { + double d = 0.0; + for( int n = 0; n < _n; ++n ) { + d += getValueFromD( i, n ); + } + _r[ i ] = d; + } + } + + public Phylogeny execute( final DistanceMatrix distance ) { + reset( distance ); + final Phylogeny phylogeny = new Phylogeny(); + while ( _n > 2 ) { + updateM(); + final int[] s = findMinimalDistance(); + final int otu1 = s[ 0 ]; + final int otu2 = s[ 1 ]; + // It is a condition that otu1 < otu2. + if ( otu1 > otu2 ) { + throw new AssertionError( "NJ code is faulty: otu1 > otu2" ); + } + final PhylogenyNodeI node = new PhylogenyNode(); + final double d = getValueFromD( otu1, otu2 ); + final double d1 = ( d / 2 ) + ( ( _r[ otu1 ] - _r[ otu2 ] ) / ( 2 * ( _n - 2 ) ) ); + final double d2 = d - d1; + getExternalPhylogenyNode( otu1 ).setDistanceToParent( d1 ); + getExternalPhylogenyNode( otu2 ).setDistanceToParent( d2 ); + node.addAsChild( getExternalPhylogenyNode( otu1 ) ); + node.addAsChild( getExternalPhylogenyNode( otu2 ) ); + if ( isVerbose() ) { + printProgress( otu1, otu2 ); + } + calculateDistancesFromNewNode( otu1, otu2, d ); + setExternalPhylogenyNode( node, otu1 ); + updateMappings( otu2 ); + --_n; + } + final double d = getValueFromD( 0, 1 ) / 2; + getExternalPhylogenyNode( 0 ).setDistanceToParent( d ); + getExternalPhylogenyNode( 1 ).setDistanceToParent( d ); + final PhylogenyNodeI root = new PhylogenyNode(); + root.addAsChild( getExternalPhylogenyNode( 0 ) ); + root.addAsChild( getExternalPhylogenyNode( 1 ) ); + if ( isVerbose() ) { + printProgress( 0, 1 ); + } + phylogeny.setRoot( ( PhylogenyNode ) root ); + phylogeny.setRooted( false ); + return phylogeny; + } + + public List execute( final List distances_list ) { + final List pl = new ArrayList(); + for( final DistanceMatrix distances : distances_list ) { + pl.add( execute( distances ) ); + } + return pl; + } + + private int[] findMinimalDistance() { + // if more than one minimal distances, always the first found is + // returned + // i could randomize this, so that any would be returned in a randomized + // fashion... + double minimum = Double.MAX_VALUE; + int otu_1 = -1; + int otu_2 = -1; + for( int j = 1; j < _n; ++j ) { + for( int i = 0; i < j; ++i ) { + if ( _m.getValue( i, j ) < minimum ) { + minimum = _m.getValue( i, j ); + otu_1 = i; + otu_2 = j; + } + } + } + return new int[] { otu_1, otu_2 }; + } + + private PhylogenyNodeI getExternalPhylogenyNode( final int i ) { + return _external_nodes[ _mappings[ i ] ]; + } + + private double getValueFromD( final int otu1, final int otu2 ) { + return _d.getValue( _mappings[ otu1 ], _mappings[ otu2 ] ); + } + + private void init() { + setVerbose( VERBOSE_DEFAULT ); + } + + private void initExternalNodes() { + _external_nodes = new PhylogenyNodeI[ _n ]; + for( int i = 0; i < _n; ++i ) { + _external_nodes[ i ] = new PhylogenyNode(); + final String id = _d.getIdentifier( i ); + if ( id != null ) { + _external_nodes[ i ].setName( id ); + } + else { + _external_nodes[ i ].setName( "" + i ); + } + _mappings[ i ] = i; + } + } + + private boolean isVerbose() { + return _verbose; + } + + private void printProgress( final int otu1, final int otu2 ) { + final PhylogenyNodeI n1 = getExternalPhylogenyNode( otu1 ); + final PhylogenyNodeI n2 = getExternalPhylogenyNode( otu2 ); + System.out.println( "Node " + ( ForesterUtil.isEmpty( n1.getName() ) ? n1.getId() : n1.getName() ) + " joins " + + ( ForesterUtil.isEmpty( n2.getName() ) ? n2.getId() : n2.getName() ) ); + } + + // only the values in the lower triangle are used. + // !matrix values will be changed! + private void reset( final DistanceMatrix distances ) { + _n = distances.getSize(); + _d = distances; + _m = new BasicSymmetricalDistanceMatrix( _n ); + _r = new double[ _n ]; + _mappings = new int[ _n ]; + initExternalNodes(); + } + + private void setExternalPhylogenyNode( final PhylogenyNodeI node, final int i ) { + _external_nodes[ _mappings[ i ] ] = node; + } + + private void setValueInD( final double d, final int otu1, final int otu2 ) { + _d.setValue( _mappings[ otu1 ], _mappings[ otu2 ], d ); + } + + public void setVerbose( final boolean verbose ) { + _verbose = verbose; + } + + private void updateM() { + calculateNetDivergences(); + for( int j = 1; j < _n; ++j ) { + for( int i = 0; i < j; ++i ) { + _m.setValue( i, j, calculateM( i, j ) ); + } + } + } + + // otu2 will, in effect, be "deleted" from the matrix. + private void updateMappings( final int otu2 ) { + for( int i = otu2; i < _mappings.length - 1; ++i ) { + _mappings[ i ] = _mappings[ i + 1 ]; + } + } + + public static NeighborJoining createInstance() { + return new NeighborJoining(); + } +} diff --git a/forester/java/src/org/forester/evoinference/distance/PairwiseDistanceCalculator.java b/forester/java/src/org/forester/evoinference/distance/PairwiseDistanceCalculator.java new file mode 100644 index 0000000..544ac02 --- /dev/null +++ b/forester/java/src/org/forester/evoinference/distance/PairwiseDistanceCalculator.java @@ -0,0 +1,181 @@ +// $Id: +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.evoinference.distance; + +import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix; +import org.forester.msa.Msa; +import org.forester.sequence.Sequence; + +public final class PairwiseDistanceCalculator { + + public static final double DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA = 10; // Felsenstein uses -1 + private static final char GAP = Sequence.GAP; + private final Msa _msa; + private final double _value_for_too_large_distance_for_kimura_formula; + + private PairwiseDistanceCalculator( final Msa msa, final double value_for_too_large_distance_for_kimura_formula ) { + _msa = msa; + _value_for_too_large_distance_for_kimura_formula = value_for_too_large_distance_for_kimura_formula; + } + + private double calcFractionalDissimilarity( final int row_1, final int row_2 ) { + final int length = _msa.getLength(); + int nd = 0; + int n = 0; + char aa_1; + char aa_2; + for( int col = 0; col < length; ++col ) { + aa_1 = _msa.getResidueAt( row_1, col ); + aa_2 = _msa.getResidueAt( row_2, col ); + if ( ( aa_1 != GAP ) && ( aa_2 != GAP ) ) { + if ( aa_1 != aa_2 ) { + nd++; + } + n++; + } + } + if ( n == 0 ) { + return 1; + } + return ( double ) nd / n; + } + + /** + * "Kimura Distance" + * Kimura, 1983 + * + * @param row_1 + * @param row_2 + * @return + */ + private double calcKimuraDistance( final int row_1, final int row_2 ) { + final double p = calcFractionalDissimilarity( row_1, row_2 ); + final double dp = 1 - p - ( 0.2 * p * p ); + if ( dp <= 0.0 ) { + return _value_for_too_large_distance_for_kimura_formula; + } + if ( dp == 1 ) { + return 0; // Too avoid -0. + } + return -Math.log( dp ); + } + + private double calcPoissonDistance( final int row_1, final int row_2 ) { + final double p = calcFractionalDissimilarity( row_1, row_2 ); + final double dp = 1 - p; + if ( dp <= 0.0 ) { + return _value_for_too_large_distance_for_kimura_formula; + } + if ( dp == 1 ) { + return 0; // Too avoid -0. + } + return -Math.log( dp ); + } + + private BasicSymmetricalDistanceMatrix calcKimuraDistances() { + final int s = _msa.getNumberOfSequences(); + final BasicSymmetricalDistanceMatrix d = new BasicSymmetricalDistanceMatrix( s ); + copyIdentifiers( s, d ); + calcKimuraDistances( s, d ); + return d; + } + + private BasicSymmetricalDistanceMatrix calcPoissonDistances() { + final int s = _msa.getNumberOfSequences(); + final BasicSymmetricalDistanceMatrix d = new BasicSymmetricalDistanceMatrix( s ); + copyIdentifiers( s, d ); + calcPoissonDistances( s, d ); + return d; + } + + private BasicSymmetricalDistanceMatrix calcFractionalDissimilarities() { + final int s = _msa.getNumberOfSequences(); + final BasicSymmetricalDistanceMatrix d = new BasicSymmetricalDistanceMatrix( s ); + copyIdentifiers( s, d ); + calcFractionalDissimilarities( s, d ); + return d; + } + + private void calcKimuraDistances( final int s, final BasicSymmetricalDistanceMatrix d ) { + for( int i = 1; i < s; i++ ) { + for( int j = 0; j < i; j++ ) { + d.setValue( i, j, calcKimuraDistance( i, j ) ); + } + } + } + + private void calcPoissonDistances( final int s, final BasicSymmetricalDistanceMatrix d ) { + for( int i = 1; i < s; i++ ) { + for( int j = 0; j < i; j++ ) { + d.setValue( i, j, calcPoissonDistance( i, j ) ); + } + } + } + + private void calcFractionalDissimilarities( final int s, final BasicSymmetricalDistanceMatrix d ) { + for( int i = 1; i < s; i++ ) { + for( int j = 0; j < i; j++ ) { + d.setValue( i, j, calcFractionalDissimilarity( i, j ) ); + } + } + } + + @Override + public Object clone() throws CloneNotSupportedException { + throw new CloneNotSupportedException(); + } + + private void copyIdentifiers( final int s, final BasicSymmetricalDistanceMatrix d ) { + for( int i = 0; i < s; i++ ) { + d.setIdentifier( i, ( String ) _msa.getIdentifier( i ) ); + } + } + + public static BasicSymmetricalDistanceMatrix calcFractionalDissimilarities( final Msa msa ) { + return new PairwiseDistanceCalculator( msa, DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA ) + .calcFractionalDissimilarities(); + } + + public static BasicSymmetricalDistanceMatrix calcPoissonDistances( final Msa msa ) { + return new PairwiseDistanceCalculator( msa, DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA ) + .calcPoissonDistances(); + } + + public static BasicSymmetricalDistanceMatrix calcKimuraDistances( final Msa msa ) { + return new PairwiseDistanceCalculator( msa, DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA ) + .calcKimuraDistances(); + } + + public static BasicSymmetricalDistanceMatrix calcKimuraDistances( final Msa msa, + final double value_for_too_large_distance_for_kimura_formula ) { + return new PairwiseDistanceCalculator( msa, value_for_too_large_distance_for_kimura_formula ) + .calcKimuraDistances(); + } + + public enum PWD_DISTANCE_METHOD { + KIMURA_DISTANCE, POISSON_DISTANCE, FRACTIONAL_DISSIMILARITY; + } +} diff --git a/forester/java/src/org/forester/evoinference/matrix/character/BasicCharacterStateMatrix.java b/forester/java/src/org/forester/evoinference/matrix/character/BasicCharacterStateMatrix.java new file mode 100644 index 0000000..6ecfb11 --- /dev/null +++ b/forester/java/src/org/forester/evoinference/matrix/character/BasicCharacterStateMatrix.java @@ -0,0 +1,538 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.evoinference.matrix.character; + +import java.io.IOException; +import java.io.Writer; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.forester.io.parsers.nexus.NexusConstants; +import org.forester.util.ForesterUtil; +import org.forester.util.IllegalFormatUseException; + +public class BasicCharacterStateMatrix implements CharacterStateMatrix { + + final Object[][] _states; + final String[] _identifiers; + final String[] _characters; + final Map _identifier_index_map; + final Map _character_index_map; + + public BasicCharacterStateMatrix( final int number_of_identifiers, final int number_of_characters ) { + _states = new Object[ number_of_identifiers ][ number_of_characters ]; + _identifiers = new String[ number_of_identifiers ]; + _characters = new String[ number_of_characters ]; + _identifier_index_map = new HashMap( number_of_identifiers ); + _character_index_map = new HashMap( number_of_characters ); + } + + public BasicCharacterStateMatrix( final int number_of_identifiers, + final int number_of_characters, + final S default_state ) { + this( number_of_identifiers, number_of_identifiers ); + for( int identifier = 0; identifier < number_of_identifiers; ++identifier ) { + for( int character = 0; character < number_of_characters; ++character ) { + setState( identifier, character, default_state ); + } + } + } + + public BasicCharacterStateMatrix( final List> states ) { + if ( ( states == null ) || ( states.size() < 1 ) || ( states.get( 0 ) == null ) ) { + throw new IllegalArgumentException( "attempt to create character state matrix from empty list" ); + } + final int number_of_characters = states.get( 0 ).size(); + final int number_of_identifiers = states.size(); + _states = new Object[ number_of_identifiers ][ number_of_characters ]; + _identifiers = new String[ number_of_identifiers ]; + _characters = new String[ number_of_characters ]; + _identifier_index_map = new HashMap( number_of_identifiers ); + _character_index_map = new HashMap( number_of_characters ); + for( int identifier = 0; identifier < number_of_identifiers; ++identifier ) { + for( int character = 0; character < number_of_characters; ++character ) { + setState( identifier, character, states.get( identifier ).get( character ) ); + } + } + } + + public BasicCharacterStateMatrix( final S[][] states ) { + this( states.length, states[ 0 ].length ); + for( int identifier = 0; identifier < states.length; ++identifier ) { + for( int character = 0; character < states[ 0 ].length; ++character ) { + setState( identifier, character, states[ identifier ][ character ] ); + } + } + } + + @Override + public boolean containsCharacter( final String character ) { + return _character_index_map.containsKey( character ); + } + + @Override + public boolean containsIdentifier( final String identifier ) { + return _identifier_index_map.containsKey( identifier ); + } + + public CharacterStateMatrix copy() { + final CharacterStateMatrix new_matrix = new BasicCharacterStateMatrix( getNumberOfIdentifiers(), + getNumberOfCharacters() ); + for( int character = 0; character < getNumberOfCharacters(); ++character ) { + if ( getCharacter( character ) != null ) { + new_matrix.setCharacter( character, getCharacter( character ) ); + } + } + for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { + if ( getIdentifier( identifier ) != null ) { + new_matrix.setIdentifier( identifier, getIdentifier( identifier ) ); + } + for( int character = 0; character < getNumberOfCharacters(); ++character ) { + new_matrix.setState( identifier, character, getState( identifier, character ) ); + } + } + return new_matrix; + } + + @Override + public boolean equals( final Object o ) { + if ( this == o ) { + return true; + } + else if ( o == null ) { + throw new IllegalArgumentException( "attempt to check character state matrix equality to null" ); + } + else if ( o.getClass() != this.getClass() ) { + throw new IllegalArgumentException( "attempt to check character state matrix to " + o + " [" + o.getClass() + + "]" ); + } + else { + final CharacterStateMatrix other = ( CharacterStateMatrix ) o; + if ( ( getNumberOfIdentifiers() != other.getNumberOfIdentifiers() ) + || ( getNumberOfCharacters() != other.getNumberOfCharacters() ) ) { + } + for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { + for( int character = 0; character < getNumberOfCharacters(); ++character ) { + final S s = getState( identifier, character ); + final S os = other.getState( identifier, character ); + if ( s == os ) { + continue; + } + else if ( ( s == null ) && ( os != null ) ) { + return false; + } + else if ( ( s != null ) && ( os == null ) ) { + return false; + } + else if ( !s.equals( other.getState( identifier, character ) ) ) { + return false; + } + } + } + return true; + } + } + + @Override + public String getCharacter( final int character_index ) { + return _characters[ character_index ]; + } + + @Override + public int getCharacterIndex( final String character ) { + if ( !_character_index_map.containsKey( character ) ) { + throw new IllegalArgumentException( "character [" + character + "] not found" ); + } + return _character_index_map.get( character ); + } + + @Override + public String getIdentifier( final int identifier_index ) { + return _identifiers[ identifier_index ]; + } + + @Override + public int getIdentifierIndex( final String identifier ) { + if ( !_identifier_index_map.containsKey( identifier ) ) { + throw new IllegalArgumentException( "indentifier [" + identifier + "] not found" ); + } + return _identifier_index_map.get( identifier ); + } + + private int getLengthOfLongestState() { + int longest = 0; + for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { + for( int character = 0; character < getNumberOfCharacters(); ++character ) { + final S s = getState( identifier, character ); + if ( s != null ) { + final int l = getState( identifier, character ).toString().length(); + if ( l > longest ) { + longest = l; + } + } + } + } + return longest; + } + + @Override + public int getNumberOfCharacters() { + if ( !isEmpty() ) { + return _states[ 0 ].length; + } + else { + return 0; + } + } + + @Override + public int getNumberOfIdentifiers() { + return _states.length; + } + + @Override + public S getState( final int identifier_index, final int character_index ) { + return ( S ) _states[ identifier_index ][ character_index ]; + } + + @Override + public S getState( final String identifier, final int character_index ) { + if ( !containsIdentifier( identifier ) ) { + throw new IllegalArgumentException( "identifier [" + identifier + "] not found" ); + } + return getState( _identifier_index_map.get( identifier ), character_index ); + } + + @Override + public S getState( final String identifier, final String character ) { + if ( !containsIdentifier( identifier ) ) { + throw new IllegalArgumentException( "identifier [" + identifier + "] not found" ); + } + if ( !containsCharacter( character ) ) { + throw new IllegalArgumentException( "character [" + character + "] not found" ); + } + return getState( _identifier_index_map.get( identifier ), _character_index_map.get( character ) ); + } + + @Override + public boolean isEmpty() { + return getNumberOfIdentifiers() <= 0; + } + + public CharacterStateMatrix pivot() { + final CharacterStateMatrix new_matrix = new BasicCharacterStateMatrix( getNumberOfCharacters(), + getNumberOfIdentifiers() ); + for( int character = 0; character < getNumberOfCharacters(); ++character ) { + if ( getCharacter( character ) != null ) { + new_matrix.setIdentifier( character, getCharacter( character ) ); + } + } + for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { + if ( getIdentifier( identifier ) != null ) { + new_matrix.setCharacter( identifier, getIdentifier( identifier ) ); + } + for( int character = 0; character < getNumberOfCharacters(); ++character ) { + new_matrix.setState( character, identifier, getState( identifier, character ) ); + } + } + return new_matrix; + } + + @Override + public void setCharacter( final int character_index, final String character ) { + if ( character == null ) { + throw new IllegalArgumentException( "attempt to use null character" ); + } + _characters[ character_index ] = character; + if ( _character_index_map.containsKey( character ) ) { + throw new IllegalArgumentException( "character [" + character + "] is not unique" ); + } + _character_index_map.put( character, character_index ); + } + + @Override + public void setIdentifier( final int identifier_index, final String identifier ) { + if ( identifier == null ) { + throw new IllegalArgumentException( "attempt to use null identifier" ); + } + _identifiers[ identifier_index ] = identifier; + if ( _identifier_index_map.containsKey( identifier ) ) { + throw new IllegalArgumentException( "identifier [" + identifier + "] is not unique" ); + } + _identifier_index_map.put( identifier, identifier_index ); + } + + @Override + public void setState( final int identifier_index, final int character_index, final S state ) { + _states[ identifier_index ][ character_index ] = state; + } + + @Override + public void setState( final String identifier, final int character_index, final S state ) { + if ( !_identifier_index_map.containsKey( identifier ) ) { + throw new IllegalArgumentException( "identifier [" + identifier + "] not found" ); + } + setState( _identifier_index_map.get( identifier ), character_index, state ); + } + + @Override + public void setState( final String identifier, final String character, final S state ) { + if ( !containsIdentifier( identifier ) ) { + throw new IllegalArgumentException( "identifier [" + identifier + "] not found" ); + } + if ( !containsCharacter( character ) ) { + throw new IllegalArgumentException( "character [" + character + "] not found" ); + } + setState( _identifier_index_map.get( identifier ), _character_index_map.get( character ), state ); + } + + private void toForester( final Writer writer ) throws IOException { + final int longest = getLengthOfLongestState() + 5; + writer.write( "Identifiers: " ); + writer.write( String.valueOf( getNumberOfIdentifiers() ) ); + writer.write( ForesterUtil.LINE_SEPARATOR ); + writer.write( "Characters : " ); + writer.write( String.valueOf( getNumberOfCharacters() ) ); + writer.write( ForesterUtil.LINE_SEPARATOR ); + writer.write( ForesterUtil.pad( "", 20, ' ', false ).toString() ); + writer.write( ' ' ); + for( int character = 0; character < getNumberOfCharacters(); ++character ) { + final String c = getCharacter( character ); + writer.write( c != null ? ForesterUtil.pad( c, longest, ' ', false ).toString() : ForesterUtil + .pad( "", longest, ' ', false ).toString() ); + if ( character < getNumberOfCharacters() - 1 ) { + writer.write( ' ' ); + } + } + writer.write( ForesterUtil.LINE_SEPARATOR ); + for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { + if ( getIdentifier( identifier ) != null ) { + writer.write( ForesterUtil.pad( getIdentifier( identifier ), 20, ' ', false ).toString() ); + writer.write( ' ' ); + } + for( int character = 0; character < getNumberOfCharacters(); ++character ) { + final S state = getState( identifier, character ); + writer.write( state != null ? ForesterUtil.pad( state.toString(), longest, ' ', false ).toString() + : ForesterUtil.pad( "", longest, ' ', false ).toString() ); + if ( character < getNumberOfCharacters() - 1 ) { + writer.write( ' ' ); + } + } + if ( identifier < getNumberOfIdentifiers() - 1 ) { + writer.write( ForesterUtil.LINE_SEPARATOR ); + } + } + } + + private void toNexus( final Writer writer ) throws IOException { + if ( isEmpty() ) { + return; + } + writer.write( NexusConstants.NEXUS ); + writer.write( ForesterUtil.LINE_SEPARATOR ); + writeNexusTaxaBlock( writer ); + writeNexusBinaryChractersBlock( writer ); + } + + private void toPhylip( final Writer writer ) throws IOException { + final int pad = 6; + writer.write( ' ' ); + writer.write( ' ' ); + writer.write( ' ' ); + writer.write( ' ' ); + writer.write( getNumberOfIdentifiers() ); + writer.write( ' ' ); + writer.write( getNumberOfCharacters() ); + writer.write( ForesterUtil.LINE_SEPARATOR ); + for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { + if ( !ForesterUtil.isEmpty( getIdentifier( identifier ) ) ) { + writer.write( ForesterUtil.pad( getIdentifier( identifier ), pad, ' ', false ).toString() ); + writer.write( ' ' ); + writer.write( ' ' ); + } + else { + throw new IllegalFormatUseException( "Phylip format does not allow empty identifiers" ); + } + writer.write( "" ); + for( int character = 0; character < getNumberOfCharacters(); ++character ) { + final String state = getState( identifier, character ).toString(); + writer.write( state != null ? ForesterUtil.pad( state, pad, ' ', false ).toString() : ForesterUtil + .pad( "", pad, ' ', false ).toString() ); + if ( character < getNumberOfCharacters() - 1 ) { + writer.write( ' ' ); + writer.write( ' ' ); + } + } + if ( identifier < getNumberOfIdentifiers() - 1 ) { + writer.write( ForesterUtil.LINE_SEPARATOR ); + } + } + } + + //TODO + //to format for microarray-style clustering + // states are ints in this case + //TODO + public void toWriter( final Writer writer ) throws IOException { + toForester( writer ); + } + + @Override + public void toWriter( final Writer writer, final Format format ) throws IOException { + switch ( format ) { + case PHYLIP: + toPhylip( writer ); + break; + case FORESTER: + toForester( writer ); + break; + case NEXUS_BINARY: + toNexus( writer ); + break; + default: + throw new IllegalArgumentException( "Unknown format:" + format ); + } + } + + public void writeNexusBinaryChractersBlock( final Writer w ) throws IOException { + //BEGIN CHARACTERS; + // DIMENSIONS NCHAR=x; + //BEGIN CHARSTATELABELS + // 1 bcl, + // 2 tir, + //END; + // FORMAT DATATYPE=STANDARD SYMBOLS=; + // MATRIX + // fish d d f + // frog s d f f + // snake x x x x; + // END; + w.write( NexusConstants.BEGIN_CHARACTERS ); + w.write( ForesterUtil.LINE_SEPARATOR ); + w.write( " " ); + w.write( NexusConstants.DIMENSIONS ); + w.write( " " ); + w.write( NexusConstants.NCHAR ); + w.write( "=" ); + w.write( String.valueOf( getNumberOfCharacters() ) ); + w.write( ";" ); + w.write( ForesterUtil.LINE_SEPARATOR ); + writeNexusCharstatelabels( w ); + w.write( " " ); + w.write( NexusConstants.FORMAT ); + w.write( " " ); + w.write( NexusConstants.DATATYPE ); + w.write( "=" ); + w.write( NexusConstants.STANDARD ); + w.write( " " ); + w.write( NexusConstants.SYMBOLS ); + w.write( "=\"" ); + w.write( String.valueOf( BinaryStates.ABSENT ) ); + w.write( String.valueOf( BinaryStates.PRESENT ) ); + w.write( "\";" ); + w.write( ForesterUtil.LINE_SEPARATOR ); + writeNexusMatrix( w ); + w.write( ForesterUtil.LINE_SEPARATOR ); + w.write( NexusConstants.END ); + w.write( ForesterUtil.LINE_SEPARATOR ); + } + + public void writeNexusCharstatelabels( final Writer w ) throws IOException { + w.write( " " ); + w.write( NexusConstants.CHARSTATELABELS ); + w.write( ForesterUtil.LINE_SEPARATOR ); + for( int i = 0; i < getNumberOfCharacters(); ++i ) { + w.write( " " + ( i + 1 ) + " '" ); + w.write( getCharacter( i ) ); + w.write( "'" ); + if ( i < getNumberOfCharacters() - 1 ) { + w.write( "," ); + w.write( ForesterUtil.LINE_SEPARATOR ); + } + } + w.write( ";" ); + w.write( ForesterUtil.LINE_SEPARATOR ); + } + + public void writeNexusMatrix( final Writer w ) throws IOException { + w.write( " " ); + w.write( NexusConstants.MATRIX ); + w.write( ForesterUtil.LINE_SEPARATOR ); + for( int identifier = 0; identifier < getNumberOfIdentifiers(); ++identifier ) { + if ( getIdentifier( identifier ) != null ) { + w.write( " " ); + w.write( ForesterUtil.pad( getIdentifier( identifier ), 20, ' ', false ).toString() ); + w.write( ' ' ); + } + for( int character = 0; character < getNumberOfCharacters(); ++character ) { + final S state = getState( identifier, character ); + if ( state == null ) { + throw new IllegalFormatUseException( "character state matrix cannot contain null if to be represented in nexus format" ); + } + if ( !( state instanceof BinaryStates ) ) { + throw new IllegalFormatUseException( "nexus format representation expects binary character data - got [" + + getState( 0, 0 ).getClass() + "] instead" ); + } + if ( state == BinaryStates.UNKNOWN ) { + throw new IllegalFormatUseException( "character state matrix cannot contain unknown states if to be represented in nexus format" ); + } + w.write( state.toString() ); + } + if ( identifier < getNumberOfIdentifiers() - 1 ) { + w.write( ForesterUtil.LINE_SEPARATOR ); + } + } + w.write( ";" ); + } + + public void writeNexusTaxaBlock( final Writer w ) throws IOException { + //BEGIN TAXA; + // DIMENSIONS NTAX=n; + // TAXLABELS fish frog snake; + //END; + w.write( NexusConstants.BEGIN_TAXA ); + w.write( ForesterUtil.LINE_SEPARATOR ); + w.write( " " ); + w.write( NexusConstants.DIMENSIONS ); + w.write( " " ); + w.write( NexusConstants.NTAX ); + w.write( "=" ); + w.write( String.valueOf( getNumberOfIdentifiers() ) ); + w.write( ";" ); + w.write( ForesterUtil.LINE_SEPARATOR ); + w.write( " " ); + w.write( NexusConstants.TAXLABELS ); + for( int i = 0; i < getNumberOfIdentifiers(); ++i ) { + w.write( " " ); + w.write( getIdentifier( i ) ); + } + w.write( ";" ); + w.write( ForesterUtil.LINE_SEPARATOR ); + w.write( NexusConstants.END ); + w.write( ForesterUtil.LINE_SEPARATOR ); + } +} diff --git a/forester/java/src/org/forester/evoinference/matrix/character/CharacterStateMatrix.java b/forester/java/src/org/forester/evoinference/matrix/character/CharacterStateMatrix.java new file mode 100644 index 0000000..5467341 --- /dev/null +++ b/forester/java/src/org/forester/evoinference/matrix/character/CharacterStateMatrix.java @@ -0,0 +1,138 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.evoinference.matrix.character; + +import java.io.IOException; +import java.io.Writer; + +public interface CharacterStateMatrix { + + public boolean containsCharacter( final String character ); + + public boolean containsIdentifier( final String identifier ); + + public CharacterStateMatrix copy(); + + public String getCharacter( int character_index ); + + public int getCharacterIndex( final String character ); + + public String getIdentifier( int identifier_index ); + + public int getIdentifierIndex( final String identifier ); + + public int getNumberOfCharacters(); + + public int getNumberOfIdentifiers(); + + public S getState( final int identifier_index, final int character_index ); + + public S getState( final String identifier, final int character_index ); + + public S getState( final String identifier, final String character ); + + public boolean isEmpty(); + + public CharacterStateMatrix pivot(); + + public void setCharacter( int character_index, final String character ); + + public void setIdentifier( int identifier_index, final String identifier ); + + public void setState( int identifier_index, int character_index, final S state ); + + public void setState( final String identifier, int character_index, final S state ); + + public void setState( final String identifier, final String character, final S state ); + + public void toWriter( final Writer writer ) throws IOException; + + public void toWriter( final Writer writer, final Format format ) throws IOException; + + /** + * It is crucial that the order + * ABSENT, UNKNOWN, PRESENT not be changes since + * this determines the sort order. + * + */ + static public enum BinaryStates { + ABSENT, UNKNOWN, PRESENT; + + public char toChar() { + switch ( this ) { + case PRESENT: + return '1'; + case ABSENT: + return '0'; + case UNKNOWN: + return '?'; + } + throw new RuntimeException( "unknown state: " + this ); + } + + @Override + public String toString() { + switch ( this ) { + case PRESENT: + return "1"; + case ABSENT: + return "0"; + case UNKNOWN: + return "?"; + } + throw new RuntimeException( "unknown state: " + this ); + } + } + + public static enum Format { + PHYLIP, FORESTER, NEXUS_BINARY + } + + static public enum GainLossStates { + GAIN, LOSS, UNCHANGED_PRESENT, UNCHANGED_ABSENT, UNKNOWN; + + @Override + public String toString() { + switch ( this ) { + case GAIN: + return "+"; + case LOSS: + return "-"; + case UNCHANGED_PRESENT: + return "X"; + case UNCHANGED_ABSENT: + return "."; + case UNKNOWN: + return "?"; + } + throw new AssertionError( "unknown state: " + this ); + } + } + + static public enum NucleotideStates { + A, C, G, T, UNKNOWN; + } +} diff --git a/forester/java/src/org/forester/evoinference/matrix/character/DiscreteState.java b/forester/java/src/org/forester/evoinference/matrix/character/DiscreteState.java new file mode 100644 index 0000000..9094d18 --- /dev/null +++ b/forester/java/src/org/forester/evoinference/matrix/character/DiscreteState.java @@ -0,0 +1,80 @@ +// $Id: +// +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.evoinference.matrix.character; + +import org.forester.util.ForesterUtil; + +public class DiscreteState implements Comparable { + + final private String _id; + + public DiscreteState( final String id ) { + if ( ForesterUtil.isEmpty( id ) ) { + throw new IllegalArgumentException( "attempt to create new discrete state from empty or null string" ); + } + _id = id.trim(); + } + + @Override + public int compareTo( final DiscreteState discrete_state ) { + if ( this == discrete_state ) { + return 0; + } + return getId().compareTo( discrete_state.getId() ); + } + + @Override + public boolean equals( final Object o ) { + if ( this == o ) { + return true; + } + else if ( o == null ) { + throw new IllegalArgumentException( "attempt to check discrete state equality to null" ); + } + else if ( o.getClass() != this.getClass() ) { + throw new IllegalArgumentException( "attempt to check discrete stateequality to " + o + " [" + o.getClass() + + "]" ); + } + else { + return getId().equals( ( ( DiscreteState ) o ).getId() ); + } + } + + public String getId() { + return _id; + } + + @Override + public int hashCode() { + return getId().hashCode(); + } + + @Override + public String toString() { + return getId(); + } +} \ No newline at end of file diff --git a/forester/java/src/org/forester/evoinference/matrix/distance/BasicSymmetricalDistanceMatrix.java b/forester/java/src/org/forester/evoinference/matrix/distance/BasicSymmetricalDistanceMatrix.java new file mode 100644 index 0000000..ac17854 --- /dev/null +++ b/forester/java/src/org/forester/evoinference/matrix/distance/BasicSymmetricalDistanceMatrix.java @@ -0,0 +1,183 @@ +// $Id: +// Exp $ +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.evoinference.matrix.distance; + +import java.io.IOException; +import java.io.Writer; +import java.text.DecimalFormat; +import java.text.NumberFormat; +import java.util.StringTokenizer; + +import org.forester.util.ForesterUtil; +import org.forester.util.IllegalFormatUseException; + +public class BasicSymmetricalDistanceMatrix implements DistanceMatrix { + + NumberFormat nf1 = NumberFormat.getInstance(); + private final static NumberFormat PHYLIP_FORMATTER = new DecimalFormat( "0.000000" ); + final double[][] _values; + final String[] _identifiers; + + public BasicSymmetricalDistanceMatrix( final int size ) { + _values = new double[ size ][ size ]; + _identifiers = new String[ size ]; + } + + public String getIdentifier( final int i ) { + return _identifiers[ i ]; + } + + public int getIndex( final String identifier ) { + for( int i = 0; i < _identifiers.length; i++ ) { + if ( getIdentifier( i ).equals( identifier ) ) { + return i; + } + } + throw new IllegalArgumentException( "identifier [" + identifier + "] not found in distance matrix" ); + } + + public int getSize() { + return _values.length; + } + + public double getValue( final int col, final int row ) { + if ( col == row ) { + if ( col >= getSize() ) { + throw new IndexOutOfBoundsException( "" ); + } + return 0.0; + } + else if ( col > row ) { + return _values[ row ][ col ]; + } + return _values[ col ][ row ]; + } + + public void randomize( final long seed ) { + final java.util.Random r = new java.util.Random( seed ); + for( int j = 0; j < getSize(); ++j ) { + for( int i = 0; i < j; ++i ) { + setValue( i, j, r.nextDouble() ); + } + } + } + + public void setIdentifier( final int i, final String identifier ) { + _identifiers[ i ] = identifier; + } + + public void setRow( final String s, final int row ) { + final StringTokenizer tk = new StringTokenizer( s ); + int i = 0; + while ( tk.hasMoreElements() ) { + setValue( i, row, new Double( tk.nextToken() ).doubleValue() ); + i++; + } + } + + public void setValue( final int col, final int row, final double d ) { + if ( ( col == row ) && ( d != 0.0 ) ) { + throw new IllegalArgumentException( "attempt to set a non-zero value on the diagonal of a symmetrical distance matrix" ); + } + else if ( col > row ) { + _values[ row ][ col ] = d; + } + _values[ col ][ row ] = d; + } + + public void write( final Writer w ) throws IOException { + w.write( " " ); + w.write( getSize() + "" ); + w.write( ForesterUtil.LINE_SEPARATOR ); + for( int row = 0; row < getSize(); ++row ) { + if ( !ForesterUtil.isEmpty( getIdentifier( row ) ) ) { + w.write( ForesterUtil.pad( getIdentifier( row ), 10, ' ', false ).toString() ); + w.write( ' ' ); + w.write( ' ' ); + } + else { + throw new IllegalFormatUseException( "Phylip format does not allow empty identifiers" ); + } + for( int col = 0; col < getSize(); ++col ) { + w.write( PHYLIP_FORMATTER.format( getValue( col, row ) ) ); + if ( col < getSize() - 1 ) { + w.write( ' ' ); + w.write( ' ' ); + } + } + if ( row < getSize() - 1 ) { + w.write( ForesterUtil.LINE_SEPARATOR ); + } + } + } + + private StringBuffer toPhylip() { + final StringBuffer sb = new StringBuffer(); + sb.append( ' ' ); + sb.append( ' ' ); + sb.append( ' ' ); + sb.append( ' ' ); + sb.append( getSize() ); + sb.append( ForesterUtil.LINE_SEPARATOR ); + for( int row = 0; row < getSize(); ++row ) { + if ( !ForesterUtil.isEmpty( getIdentifier( row ) ) ) { + sb.append( ForesterUtil.pad( getIdentifier( row ), 10, ' ', false ) ); + sb.append( ' ' ); + sb.append( ' ' ); + } + else { + throw new IllegalFormatUseException( "Phylip format does not allow empty identifiers" ); + } + //sb.append( "" ); + for( int col = 0; col < getSize(); ++col ) { + sb.append( PHYLIP_FORMATTER.format( getValue( col, row ) ) ); + if ( col < getSize() - 1 ) { + sb.append( ' ' ); + sb.append( ' ' ); + } + } + if ( row < getSize() - 1 ) { + sb.append( ForesterUtil.LINE_SEPARATOR ); + } + } + return sb; + } + + @Override + public String toString() { + return toPhylip().toString(); + } + + public StringBuffer toStringBuffer( final Format format ) { + switch ( format ) { + case PHYLIP: + return toPhylip(); + default: + throw new IllegalArgumentException( "Unknown format:" + format ); + } + } +} diff --git a/forester/java/src/org/forester/evoinference/matrix/distance/DistanceMatrix.java b/forester/java/src/org/forester/evoinference/matrix/distance/DistanceMatrix.java new file mode 100644 index 0000000..74f8ff1 --- /dev/null +++ b/forester/java/src/org/forester/evoinference/matrix/distance/DistanceMatrix.java @@ -0,0 +1,47 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.evoinference.matrix.distance; + +public interface DistanceMatrix { + + public String getIdentifier( int i ); + + public int getIndex( String identifier ); + + public int getSize(); + + public double getValue( int col, int row ); + + public void setIdentifier( int i, final String identifier ); + + public void setValue( int col, int row, double distance ); + + public StringBuffer toStringBuffer( Format format ); + + public static enum Format { + PHYLIP + } +} diff --git a/forester/java/src/org/forester/evoinference/parsimony/DolloParsimony.java b/forester/java/src/org/forester/evoinference/parsimony/DolloParsimony.java new file mode 100644 index 0000000..a10e865 --- /dev/null +++ b/forester/java/src/org/forester/evoinference/parsimony/DolloParsimony.java @@ -0,0 +1,396 @@ +// $Id: +// +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.evoinference.parsimony; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; +import org.forester.evoinference.matrix.character.CharacterStateMatrix; +import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; +import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.util.ForesterUtil; + +public class DolloParsimony { + + final static private BinaryStates PRESENT = BinaryStates.PRESENT; + final static private BinaryStates ABSENT = BinaryStates.ABSENT; + final static private BinaryStates UNKNOWN = BinaryStates.UNKNOWN; + final static private GainLossStates LOSS = GainLossStates.LOSS; + final static private GainLossStates GAIN = GainLossStates.GAIN; + final static private GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT; + final static private GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT; + private static final boolean RETURN_INTERNAL_STATES_DEFAULT = false; + private static final boolean RETURN_GAIN_LOSS_MATRIX_DEFAULT = false; + private boolean _return_internal_states = false; + private boolean _return_gain_loss = false; + private int _total_gains; + private int _total_losses; + private int _total_unchanged; + private CharacterStateMatrix _internal_states_matrix; + private CharacterStateMatrix _gain_loss_matrix; + + private DolloParsimony() { + init(); + } + + public void execute( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { + if ( !p.isRooted() ) { + throw new IllegalArgumentException( "attempt to execute Dollo parsimony on unroored phylogeny" ); + } + if ( external_node_states_matrix.isEmpty() ) { + throw new IllegalArgumentException( "character matrix is empty" ); + } + if ( external_node_states_matrix.getNumberOfIdentifiers() != p.getNumberOfExternalNodes() ) { + throw new IllegalArgumentException( "number of external nodes in phylogeny [" + + p.getNumberOfExternalNodes() + "] and number of indentifiers [" + + external_node_states_matrix.getNumberOfIdentifiers() + "] in matrix are not equal" ); + } + reset(); + if ( isReturnInternalStates() ) { + initializeInternalStates( p, external_node_states_matrix ); + } + if ( isReturnGainLossMatrix() ) { + initializeGainLossMatrix( p, external_node_states_matrix ); + } + for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { + executeForOneCharacter( p, + getStatesForCharacter( p, external_node_states_matrix, character_index ), + character_index ); + } + if ( ( external_node_states_matrix.getNumberOfCharacters() * p.getNumberOfBranches() ) != ( getTotalGains() + + getTotalLosses() + getTotalUnchanged() ) ) { + throw new AssertionError( "this should not have happened: something is deeply wrong with Dollo parsimony implementation" ); + } + } + + private void executeForOneCharacter( final Phylogeny p, + final Map states, + final int character_state_column ) { + postOrderTraversal( p, states ); + preOrderTraversal( p, states, character_state_column ); + } + + /* (non-Javadoc) + * @see org.forester.phylogenyinference.Parsimony#getCost() + */ + public int getCost() { + return getTotalGains() + getTotalLosses(); + } + + /* (non-Javadoc) + * @see org.forester.phylogenyinference.Parsimony#getGainLossMatrix() + */ + public CharacterStateMatrix getGainLossMatrix() { + if ( !isReturnGainLossMatrix() ) { + throw new RuntimeException( "creation of gain-loss matrix has not been enabled" ); + } + return _gain_loss_matrix; + } + + /* (non-Javadoc) + * @see org.forester.phylogenyinference.Parsimony#getInternalStatesMatrix() + */ + public CharacterStateMatrix getInternalStatesMatrix() { + if ( !isReturnInternalStates() ) { + throw new RuntimeException( "creation of internal state matrix has not been enabled" ); + } + return _internal_states_matrix; + } + + private Map getStatesForCharacter( final Phylogeny p, + final CharacterStateMatrix matrix, + final int character_index ) { + final Map states = new HashMap( matrix + .getNumberOfIdentifiers() ); + for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) { + final BinaryStates state = matrix.getState( indentifier_index, character_index ); + if ( state == null ) { + throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index + + "] is null" ); + } + states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), state ); + } + return states; + } + + /* (non-Javadoc) + * @see org.forester.phylogenyinference.Parsimony#getTotalGains() + */ + public int getTotalGains() { + return _total_gains; + } + + /* (non-Javadoc) + * @see org.forester.phylogenyinference.Parsimony#getTotalLosses() + */ + public int getTotalLosses() { + return _total_losses; + } + + /* (non-Javadoc) + * @see org.forester.phylogenyinference.Parsimony#getTotalUnchanged() + */ + public int getTotalUnchanged() { + return _total_unchanged; + } + + private void init() { + setReturnInternalStates( RETURN_INTERNAL_STATES_DEFAULT ); + setReturnGainLossMatrix( RETURN_GAIN_LOSS_MATRIX_DEFAULT ); + reset(); + } + + private void initializeGainLossMatrix( final Phylogeny p, + final CharacterStateMatrix external_node_states_matrix ) { + final List nodes = new ArrayList(); + for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { + nodes.add( postorder.next() ); + } + setGainLossMatrix( new BasicCharacterStateMatrix( nodes.size(), + external_node_states_matrix + .getNumberOfCharacters() ) ); + int identifier_index = 0; + for( final PhylogenyNode node : nodes ) { + getGainLossMatrix().setIdentifier( identifier_index++, + ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node + .getName() ); + } + for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { + getGainLossMatrix().setCharacter( character_index, + external_node_states_matrix.getCharacter( character_index ) ); + } + } + + private void initializeInternalStates( final Phylogeny p, + final CharacterStateMatrix external_node_states_matrix ) { + final List internal_nodes = new ArrayList(); + for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { + final PhylogenyNode node = postorder.next(); + if ( node.isInternal() ) { + internal_nodes.add( node ); + } + } + setInternalStatesMatrix( new BasicCharacterStateMatrix( internal_nodes.size(), + external_node_states_matrix + .getNumberOfCharacters() ) ); + int identifier_index = 0; + for( final PhylogenyNode node : internal_nodes ) { + getInternalStatesMatrix().setIdentifier( identifier_index++, + ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node + .getName() ); + } + for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { + getInternalStatesMatrix().setCharacter( character_index, + external_node_states_matrix.getCharacter( character_index ) ); + } + } + + private boolean isReturnGainLossMatrix() { + return _return_gain_loss; + } + + private boolean isReturnInternalStates() { + return _return_internal_states; + } + + private void postOrderTraversal( final Phylogeny p, final Map states ) + throws AssertionError { + for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { + final PhylogenyNode node = postorder.next(); + if ( !node.isExternal() ) { + final int present_unknown = getNumberOfChildNodesWithPresentOrUnknownState( states, node ); + if ( present_unknown < 1 ) { + states.put( node, ABSENT ); + } + else if ( present_unknown == 1 ) { + states.put( node, UNKNOWN ); + } + else { + states.put( node, PRESENT ); + } + } + } + } + + private void preOrderTraversal( final Phylogeny p, + final Map states, + final int character_state_column ) throws AssertionError { + boolean gain = false; + for( final PhylogenyNodeIterator preorder = p.iteratorPreorder(); preorder.hasNext(); ) { + final PhylogenyNode node = preorder.next(); + BinaryStates parent_state = null; + if ( !node.isRoot() ) { + parent_state = states.get( node.getParent() ); + } + if ( !node.isExternal() ) { + if ( states.get( node ) == UNKNOWN ) { + if ( parent_state == PRESENT ) { + states.put( node, PRESENT ); + } + else { + if ( isCharacterPresentOrUnknownInAtLeastTwoChildNodes( states, node ) ) { + states.put( node, PRESENT ); + } + else { + states.put( node, ABSENT ); + } + } + } + if ( isReturnInternalStates() ) { + setInternalNodeState( states, character_state_column, node ); + } + } + final BinaryStates current_state = states.get( node ); + if ( ( parent_state == PRESENT ) && ( current_state == ABSENT ) ) { + ++_total_losses; + if ( isReturnGainLossMatrix() ) { + setGainLossState( character_state_column, node, LOSS ); + } + } + else if ( ( ( parent_state == ABSENT ) ) && ( current_state == PRESENT ) ) { + if ( gain ) { + throw new RuntimeException( "this should not have happened: dollo parsimony cannot have more than one gain" ); + } + gain = true; + ++_total_gains; + if ( isReturnGainLossMatrix() ) { + setGainLossState( character_state_column, node, GAIN ); + } + } + else { + ++_total_unchanged; + if ( isReturnGainLossMatrix() ) { + if ( current_state == PRESENT ) { + setGainLossState( character_state_column, node, UNCHANGED_PRESENT ); + } + else if ( current_state == ABSENT ) { + setGainLossState( character_state_column, node, UNCHANGED_ABSENT ); + } + } + } + } + } + + private void reset() { + setTotalLosses( 0 ); + setTotalGains( 0 ); + setTotalUnchanged( 0 ); + } + + private void setGainLossMatrix( final CharacterStateMatrix gain_loss_matrix ) { + _gain_loss_matrix = gain_loss_matrix; + } + + private void setGainLossState( final int character_state_column, + final PhylogenyNode node, + final GainLossStates state ) { + getGainLossMatrix().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), + character_state_column, + state ); + } + + private void setInternalNodeState( final Map states, + final int character_state_column, + final PhylogenyNode node ) { + getInternalStatesMatrix() + .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), + character_state_column, + states.get( node ) ); + } + + private void setInternalStatesMatrix( final CharacterStateMatrix internal_states_matrix ) { + _internal_states_matrix = internal_states_matrix; + } + + /* (non-Javadoc) + * @see org.forester.phylogenyinference.Parsimony#setReturnGainLossMatrix(boolean) + */ + public void setReturnGainLossMatrix( final boolean return_gain_loss ) { + _return_gain_loss = return_gain_loss; + } + + /* (non-Javadoc) + * @see org.forester.phylogenyinference.Parsimony#setReturnInternalStates(boolean) + */ + public void setReturnInternalStates( final boolean return_internal_states ) { + _return_internal_states = return_internal_states; + } + + private void setTotalGains( final int total_gains ) { + _total_gains = total_gains; + } + + private void setTotalLosses( final int total_losses ) { + _total_losses = total_losses; + } + + private void setTotalUnchanged( final int total_unchanged ) { + _total_unchanged = total_unchanged; + } + + public static DolloParsimony createInstance() { + return new DolloParsimony(); + } + + private static int getNumberOfChildNodesWithPresentOrUnknownState( final Map states, + final PhylogenyNode node ) { + int presents = 0; + for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { + final PhylogenyNode node_child = node.getChildNode( i ); + if ( !states.containsKey( node_child ) ) { + throw new RuntimeException( "this should not have happened: node [" + node_child.getName() + + "] not found in node state map" ); + } + if ( ( states.get( node_child ) == BinaryStates.PRESENT ) + || ( states.get( node_child ) == BinaryStates.UNKNOWN ) ) { + ++presents; + } + } + return presents; + } + + private static boolean isCharacterPresentOrUnknownInAtLeastTwoChildNodes( final Map states, + final PhylogenyNode node ) { + int count = 0; + for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { + final PhylogenyNode node_child = node.getChildNode( i ); + if ( ( states.get( node_child ) == PRESENT ) || ( states.get( node_child ) == UNKNOWN ) ) { + ++count; + if ( count > 1 ) { + return true; + } + } + } + return false; + } +} diff --git a/forester/java/src/org/forester/evoinference/parsimony/FitchParsimony.java b/forester/java/src/org/forester/evoinference/parsimony/FitchParsimony.java new file mode 100644 index 0000000..8323167 --- /dev/null +++ b/forester/java/src/org/forester/evoinference/parsimony/FitchParsimony.java @@ -0,0 +1,539 @@ +// $Id: +// +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.evoinference.parsimony; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; +import org.forester.evoinference.matrix.character.CharacterStateMatrix; +import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; +import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.util.FailedConditionCheckException; +import org.forester.util.ForesterUtil; + +public class FitchParsimony { + + final static private BinaryStates PRESENT = BinaryStates.PRESENT; + final static private BinaryStates ABSENT = BinaryStates.ABSENT; + final static private GainLossStates LOSS = GainLossStates.LOSS; + final static private GainLossStates GAIN = GainLossStates.GAIN; + final static private GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT; + final static private GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT; + private static final boolean RETURN_INTERNAL_STATES_DEFAULT = false; + private static final boolean RETURN_GAIN_LOSS_MATRIX_DEFAULT = false; + private static final boolean RANDOMIZE_DEFAULT = false; + private static final long RANDOM_NUMBER_SEED_DEFAULT = 21; + private static final boolean USE_LAST_DEFAULT = false; + private boolean _return_internal_states = false; + private boolean _return_gain_loss = false; + private int _total_gains; + private int _total_losses; + private int _total_unchanged; + private CharacterStateMatrix> _internal_states_matrix_prior_to_traceback; + private CharacterStateMatrix _internal_states_matrix_after_traceback; + private CharacterStateMatrix _gain_loss_matrix; + private boolean _randomize; + private boolean _use_last; + private int _cost; + private long _random_number_seed; + private Random _random_generator; + + public FitchParsimony() { + init(); + } + + private int determineIndex( final SortedSet current_node_states, int i ) { + if ( isRandomize() ) { + i = getRandomGenerator().nextInt( current_node_states.size() ); + } + else if ( isUseLast() ) { + i = current_node_states.size() - 1; + } + return i; + } + + public void execute( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { + if ( !p.isRooted() ) { + throw new IllegalArgumentException( "attempt to execute Fitch parsimony on unroored phylogeny" ); + } + if ( external_node_states_matrix.isEmpty() ) { + throw new IllegalArgumentException( "character matrix is empty" ); + } + if ( external_node_states_matrix.getNumberOfIdentifiers() != p.getNumberOfExternalNodes() ) { + throw new IllegalArgumentException( "number of external nodes in phylogeny [" + + p.getNumberOfExternalNodes() + "] and number of indentifiers [" + + external_node_states_matrix.getNumberOfIdentifiers() + "] in matrix are not equal" ); + } + reset(); + if ( isReturnInternalStates() ) { + initializeInternalStates( p, external_node_states_matrix ); + } + if ( isReturnGainLossMatrix() ) { + initializeGainLossMatrix( p, external_node_states_matrix ); + } + for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { + executeForOneCharacter( p, + getStatesForCharacter( p, external_node_states_matrix, character_index ), + getStatesForCharacterForTraceback( p, external_node_states_matrix, character_index ), + character_index ); + } + if ( external_node_states_matrix.getState( 0, 0 ) instanceof BinaryStates ) { + if ( ( external_node_states_matrix.getNumberOfCharacters() * p.getNumberOfBranches() ) != ( getTotalGains() + + getTotalLosses() + getTotalUnchanged() ) ) { + throw new FailedConditionCheckException( "this should not have happened: something is deeply wrong with Fitch parsimony implementation" ); + } + } + } + + private void executeForOneCharacter( final Phylogeny p, + final Map> states, + final Map traceback_states, + final int character_state_column ) { + postOrderTraversal( p, states ); + preOrderTraversal( p, states, traceback_states, character_state_column ); + } + + public int getCost() { + return _cost; + } + + public CharacterStateMatrix getGainLossMatrix() { + if ( !isReturnGainLossMatrix() ) { + throw new RuntimeException( "creation of gain-loss matrix has not been enabled" ); + } + return _gain_loss_matrix; + } + + public CharacterStateMatrix getInternalStatesMatrix() { + if ( !isReturnInternalStates() ) { + throw new RuntimeException( "creation of internal state matrix has not been enabled" ); + } + return _internal_states_matrix_after_traceback; + } + + /** + * Returns a view of the internal states prior to trace-back. + * + * @return + */ + public CharacterStateMatrix> getInternalStatesMatrixPriorToTraceback() { + if ( !isReturnInternalStates() ) { + throw new RuntimeException( "creation of internal state matrix has not been enabled" ); + } + return _internal_states_matrix_prior_to_traceback; + } + + private SortedSet getIntersectionOfStatesOfChildNodes( final Map> states, + final PhylogenyNode node ) throws AssertionError { + final SortedSet states_in_child_nodes = new TreeSet(); + for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { + final PhylogenyNode node_child = node.getChildNode( i ); + if ( !states.containsKey( node_child ) ) { + throw new AssertionError( "this should not have happened: node [" + node_child.getName() + + "] not found in node state map" ); + } + if ( i == 0 ) { + states_in_child_nodes.addAll( states.get( node_child ) ); + } + else { + states_in_child_nodes.retainAll( states.get( node_child ) ); + } + } + return states_in_child_nodes; + } + + private Random getRandomGenerator() { + return _random_generator; + } + + private long getRandomNumberSeed() { + return _random_number_seed; + } + + private STATE_TYPE getStateAt( final int i, final SortedSet states ) { + final Iterator it = states.iterator(); + for( int j = 0; j < i; ++j ) { + it.next(); + } + return it.next(); + } + + private Map> getStatesForCharacter( final Phylogeny p, + final CharacterStateMatrix matrix, + final int character_index ) { + final Map> states = new HashMap>( matrix + .getNumberOfIdentifiers() ); + for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) { + final STATE_TYPE state = matrix.getState( indentifier_index, character_index ); + if ( state == null ) { + throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index + + "] is null" ); + } + final SortedSet l = new TreeSet(); + l.add( state ); + states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), l ); + } + return states; + } + + private Map getStatesForCharacterForTraceback( final Phylogeny p, + final CharacterStateMatrix matrix, + final int character_index ) { + final Map states = new HashMap( matrix + .getNumberOfIdentifiers() ); + for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) { + final STATE_TYPE state = matrix.getState( indentifier_index, character_index ); + if ( state == null ) { + throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index + + "] is null" ); + } + states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), state ); + } + return states; + } + + public int getTotalGains() { + return _total_gains; + } + + public int getTotalLosses() { + return _total_losses; + } + + public int getTotalUnchanged() { + return _total_unchanged; + } + + private SortedSet getUnionOfStatesOfChildNodes( final Map> states, + final PhylogenyNode node ) throws AssertionError { + final SortedSet states_in_child_nodes = new TreeSet(); + for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { + final PhylogenyNode node_child = node.getChildNode( i ); + if ( !states.containsKey( node_child ) ) { + throw new AssertionError( "this should not have happened: node [" + node_child.getName() + + "] not found in node state map" ); + } + states_in_child_nodes.addAll( states.get( node_child ) ); + } + return states_in_child_nodes; + } + + private void increaseCost() { + ++_cost; + } + + private void init() { + setReturnInternalStates( RETURN_INTERNAL_STATES_DEFAULT ); + setReturnGainLossMatrix( RETURN_GAIN_LOSS_MATRIX_DEFAULT ); + setRandomize( RANDOMIZE_DEFAULT ); + setUseLast( USE_LAST_DEFAULT ); + _random_number_seed = RANDOM_NUMBER_SEED_DEFAULT; + reset(); + } + + private void initializeGainLossMatrix( final Phylogeny p, + final CharacterStateMatrix external_node_states_matrix ) { + final List nodes = new ArrayList(); + for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { + nodes.add( postorder.next() ); + } + setGainLossMatrix( new BasicCharacterStateMatrix( nodes.size(), + external_node_states_matrix + .getNumberOfCharacters() ) ); + int identifier_index = 0; + for( final PhylogenyNode node : nodes ) { + getGainLossMatrix().setIdentifier( identifier_index++, + ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node + .getName() ); + } + for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { + getGainLossMatrix().setCharacter( character_index, + external_node_states_matrix.getCharacter( character_index ) ); + } + } + + private void initializeInternalStates( final Phylogeny p, + final CharacterStateMatrix external_node_states_matrix ) { + final List internal_nodes = new ArrayList(); + for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { + final PhylogenyNode node = postorder.next(); + if ( node.isInternal() ) { + internal_nodes.add( node ); + } + } + setInternalStatesMatrixPriorToTraceback( new BasicCharacterStateMatrix>( internal_nodes.size(), + external_node_states_matrix + .getNumberOfCharacters() ) ); + setInternalStatesMatrixTraceback( new BasicCharacterStateMatrix( internal_nodes.size(), + external_node_states_matrix + .getNumberOfCharacters() ) ); + int identifier_index = 0; + for( final PhylogenyNode node : internal_nodes ) { + getInternalStatesMatrix().setIdentifier( identifier_index, + ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node + .getName() ); + getInternalStatesMatrixPriorToTraceback().setIdentifier( identifier_index, + ForesterUtil.isEmpty( node.getName() ) ? node + .getId() + + "" : node.getName() ); + ++identifier_index; + } + for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { + getInternalStatesMatrix().setCharacter( character_index, + external_node_states_matrix.getCharacter( character_index ) ); + getInternalStatesMatrixPriorToTraceback().setCharacter( character_index, + external_node_states_matrix + .getCharacter( character_index ) ); + } + } + + private boolean isRandomize() { + return _randomize; + } + + private boolean isReturnGainLossMatrix() { + return _return_gain_loss; + } + + private boolean isReturnInternalStates() { + return _return_internal_states; + } + + private boolean isUseLast() { + return _use_last; + } + + private void postOrderTraversal( final Phylogeny p, final Map> states ) + throws AssertionError { + for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { + final PhylogenyNode node = postorder.next(); + if ( !node.isExternal() ) { + SortedSet states_in_children = getIntersectionOfStatesOfChildNodes( states, node ); + if ( states_in_children.isEmpty() ) { + states_in_children = getUnionOfStatesOfChildNodes( states, node ); + } + states.put( node, states_in_children ); + } + } + } + + private void preOrderTraversal( final Phylogeny p, + final Map> states, + final Map traceback_states, + final int character_state_column ) throws AssertionError { + for( final PhylogenyNodeIterator preorder = p.iteratorPreorder(); preorder.hasNext(); ) { + final PhylogenyNode current_node = preorder.next(); + final SortedSet current_node_states = states.get( current_node ); + STATE_TYPE parent_state = null; + if ( current_node.isRoot() ) { + int i = 0; + i = determineIndex( current_node_states, i ); + traceback_states.put( current_node, getStateAt( i, current_node_states ) ); + } + else { + parent_state = traceback_states.get( current_node.getParent() ); + if ( current_node_states.contains( parent_state ) ) { + traceback_states.put( current_node, parent_state ); + } + else { + increaseCost(); + int i = 0; + i = determineIndex( current_node_states, i ); + traceback_states.put( current_node, getStateAt( i, current_node_states ) ); + } + } + if ( isReturnInternalStates() ) { + if ( !current_node.isExternal() ) { + setInternalNodeStatePriorToTraceback( states, character_state_column, current_node ); + setInternalNodeState( traceback_states, character_state_column, current_node ); + } + } + if ( isReturnGainLossMatrix() && !current_node.isRoot() ) { + if ( !( parent_state instanceof BinaryStates ) ) { + throw new RuntimeException( "attempt to create gain loss matrix for not binary states" ); + } + final BinaryStates parent_binary_state = ( BinaryStates ) parent_state; + final BinaryStates current_binary_state = ( BinaryStates ) traceback_states.get( current_node ); + if ( ( parent_binary_state == PRESENT ) && ( current_binary_state == ABSENT ) ) { + ++_total_losses; + setGainLossState( character_state_column, current_node, LOSS ); + } + else if ( ( ( parent_binary_state == ABSENT ) || ( parent_binary_state == null ) ) + && ( current_binary_state == PRESENT ) ) { + ++_total_gains; + setGainLossState( character_state_column, current_node, GAIN ); + } + else { + ++_total_unchanged; + if ( current_binary_state == PRESENT ) { + setGainLossState( character_state_column, current_node, UNCHANGED_PRESENT ); + } + else if ( current_binary_state == ABSENT ) { + setGainLossState( character_state_column, current_node, UNCHANGED_ABSENT ); + } + } + } + else if ( isReturnGainLossMatrix() && current_node.isRoot() ) { + final BinaryStates current_binary_state = ( BinaryStates ) traceback_states.get( current_node ); + ++_total_unchanged; //new + if ( current_binary_state == PRESENT ) {//new + setGainLossState( character_state_column, current_node, UNCHANGED_PRESENT );//new + }//new + else if ( current_binary_state == ABSENT ) {//new + setGainLossState( character_state_column, current_node, UNCHANGED_ABSENT );//new + }//new + // setGainLossState( character_state_column, current_node, UNKNOWN_GAIN_LOSS ); + } + } + } + + private void reset() { + setCost( 0 ); + setTotalLosses( 0 ); + setTotalGains( 0 ); + setTotalUnchanged( 0 ); + setRandomGenerator( new Random( getRandomNumberSeed() ) ); + } + + private void setCost( final int cost ) { + _cost = cost; + } + + private void setGainLossMatrix( final CharacterStateMatrix gain_loss_matrix ) { + _gain_loss_matrix = gain_loss_matrix; + } + + private void setGainLossState( final int character_state_column, + final PhylogenyNode node, + final GainLossStates state ) { + getGainLossMatrix().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), + character_state_column, + state ); + } + + private void setInternalNodeState( final Map states, + final int character_state_column, + final PhylogenyNode node ) { + getInternalStatesMatrix() + .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), + character_state_column, + states.get( node ) ); + } + + private void setInternalNodeStatePriorToTraceback( final Map> states, + final int character_state_column, + final PhylogenyNode node ) { + getInternalStatesMatrixPriorToTraceback().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" + : node.getName(), + character_state_column, + toListSorted( states.get( node ) ) ); + } + + private void setInternalStatesMatrixPriorToTraceback( final CharacterStateMatrix> internal_states_matrix_prior_to_traceback ) { + _internal_states_matrix_prior_to_traceback = internal_states_matrix_prior_to_traceback; + } + + private void setInternalStatesMatrixTraceback( final CharacterStateMatrix internal_states_matrix_after_traceback ) { + _internal_states_matrix_after_traceback = internal_states_matrix_after_traceback; + } + + private void setRandomGenerator( final Random random_generator ) { + _random_generator = random_generator; + } + + public void setRandomize( final boolean randomize ) { + if ( randomize && isUseLast() ) { + throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" ); + } + _randomize = randomize; + } + + public void setRandomNumberSeed( final long random_number_seed ) { + if ( !isRandomize() ) { + throw new IllegalArgumentException( "attempt to set random number generator seed without randomization enabled" ); + } + _random_number_seed = random_number_seed; + } + + public void setReturnGainLossMatrix( final boolean return_gain_loss ) { + _return_gain_loss = return_gain_loss; + } + + public void setReturnInternalStates( final boolean return_internal_states ) { + _return_internal_states = return_internal_states; + } + + private void setTotalGains( final int total_gains ) { + _total_gains = total_gains; + } + + private void setTotalLosses( final int total_losses ) { + _total_losses = total_losses; + } + + private void setTotalUnchanged( final int total_unchanged ) { + _total_unchanged = total_unchanged; + } + + /** + * This sets whether to use the first or last state in the sorted + * states at the undecided internal nodes. + * For randomized choices set randomize to true (and this to false). + * + * Note. It might be advisable to set this to false + * for BinaryStates if absence at the root is preferred + * (given the enum BinaryStates sorts in the following order: + * ABSENT, UNKNOWN, PRESENT). + * + * + * @param use_last + */ + public void setUseLast( final boolean use_last ) { + if ( use_last && isRandomize() ) { + throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" ); + } + _use_last = use_last; + } + + private List toListSorted( final SortedSet states ) { + final List l = new ArrayList( states.size() ); + for( final STATE_TYPE state : states ) { + l.add( state ); + } + return l; + } +} diff --git a/forester/java/src/org/forester/evoinference/parsimony/SankoffParsimony.java b/forester/java/src/org/forester/evoinference/parsimony/SankoffParsimony.java new file mode 100644 index 0000000..195a7e0 --- /dev/null +++ b/forester/java/src/org/forester/evoinference/parsimony/SankoffParsimony.java @@ -0,0 +1,546 @@ +// $Id: +// +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org + +package org.forester.evoinference.parsimony; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.SortedSet; +import java.util.TreeSet; + +import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix; +import org.forester.evoinference.matrix.character.CharacterStateMatrix; +import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates; +import org.forester.evoinference.matrix.character.CharacterStateMatrix.GainLossStates; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.util.ForesterUtil; + +/** + * + * IN PROGRESS! + * DO NOT USE! + * + * + * @param + */ +public class SankoffParsimony { + + final static private BinaryStates PRESENT = BinaryStates.PRESENT; + final static private BinaryStates ABSENT = BinaryStates.ABSENT; + final static private GainLossStates LOSS = GainLossStates.LOSS; + final static private GainLossStates GAIN = GainLossStates.GAIN; + final static private GainLossStates UNCHANGED_PRESENT = GainLossStates.UNCHANGED_PRESENT; + final static private GainLossStates UNCHANGED_ABSENT = GainLossStates.UNCHANGED_ABSENT; + private static final boolean RETURN_INTERNAL_STATES_DEFAULT = false; + private static final boolean RETURN_GAIN_LOSS_MATRIX_DEFAULT = false; + private static final boolean RANDOMIZE_DEFAULT = false; + private static final long RANDOM_NUMBER_SEED_DEFAULT = 21; + private static final boolean USE_LAST_DEFAULT = false; + private boolean _return_internal_states = false; + private boolean _return_gain_loss = false; + private int _total_gains; + private int _total_losses; + private int _total_unchanged; + private CharacterStateMatrix> _internal_states_matrix_prior_to_traceback; + private CharacterStateMatrix _internal_states_matrix_after_traceback; + private CharacterStateMatrix _gain_loss_matrix; + private boolean _randomize; + private boolean _use_last; + private int _cost; + private long _random_number_seed; + private Random _random_generator; + + public SankoffParsimony() { + init(); + } + + private int determineIndex( final SortedSet current_node_states, int i ) { + if ( isRandomize() ) { + i = getRandomGenerator().nextInt( current_node_states.size() ); + } + else if ( isUseLast() ) { + i = current_node_states.size() - 1; + } + return i; + } + + public void execute( final Phylogeny p, final CharacterStateMatrix external_node_states_matrix ) { + if ( !p.isRooted() ) { + throw new IllegalArgumentException( "attempt to execute Fitch parsimony on unroored phylogeny" ); + } + if ( external_node_states_matrix.isEmpty() ) { + throw new IllegalArgumentException( "character matrix is empty" ); + } + if ( external_node_states_matrix.getNumberOfIdentifiers() != p.getNumberOfExternalNodes() ) { + throw new IllegalArgumentException( "number of external nodes in phylogeny [" + + p.getNumberOfExternalNodes() + "] and number of indentifiers [" + + external_node_states_matrix.getNumberOfIdentifiers() + "] in matrix are not equal" ); + } + reset(); + if ( isReturnInternalStates() ) { + initializeInternalStates( p, external_node_states_matrix ); + } + if ( isReturnGainLossMatrix() ) { + initializeGainLossMatrix( p, external_node_states_matrix ); + } + for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { + executeForOneCharacter( p, + getStatesForCharacter( p, external_node_states_matrix, character_index ), + getStatesForCharacterForTraceback( p, external_node_states_matrix, character_index ), + character_index ); + } + if ( external_node_states_matrix.getState( 0, 0 ) instanceof BinaryStates ) { + if ( ( external_node_states_matrix.getNumberOfCharacters() * p.getNumberOfBranches() ) != ( getTotalGains() + + getTotalLosses() + getTotalUnchanged() ) ) { + throw new RuntimeException( "this should not have happened: something is deeply wrong with Fitch parsimony implementation" ); + } + } + } + + private void executeForOneCharacter( final Phylogeny p, + final Map> states, + final Map traceback_states, + final int character_state_column ) { + postOrderTraversal( p, states ); + preOrderTraversal( p, states, traceback_states, character_state_column ); + } + + public int getCost() { + return _cost; + } + + public CharacterStateMatrix getGainLossMatrix() { + if ( !isReturnGainLossMatrix() ) { + throw new RuntimeException( "creation of gain-loss matrix has not been enabled" ); + } + return _gain_loss_matrix; + } + + public CharacterStateMatrix getInternalStatesMatrix() { + if ( !isReturnInternalStates() ) { + throw new RuntimeException( "creation of internal state matrix has not been enabled" ); + } + return _internal_states_matrix_after_traceback; + } + + /** + * Returns a view of the internal states prior to trace-back. + * + * @return + */ + public CharacterStateMatrix> getInternalStatesMatrixPriorToTraceback() { + if ( !isReturnInternalStates() ) { + throw new RuntimeException( "creation of internal state matrix has not been enabled" ); + } + return _internal_states_matrix_prior_to_traceback; + } + + private SortedSet getIntersectionOfStatesOfChildNodes( final Map> states, + final PhylogenyNode node ) throws AssertionError { + final SortedSet states_in_child_nodes = new TreeSet(); + for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { + final PhylogenyNode node_child = node.getChildNode( i ); + if ( !states.containsKey( node_child ) ) { + throw new AssertionError( "this should not have happened: node [" + node_child.getName() + + "] not found in node state map" ); + } + if ( i == 0 ) { + states_in_child_nodes.addAll( states.get( node_child ) ); + } + else { + states_in_child_nodes.retainAll( states.get( node_child ) ); + } + } + return states_in_child_nodes; + } + + private Random getRandomGenerator() { + return _random_generator; + } + + private long getRandomNumberSeed() { + return _random_number_seed; + } + + private STATE_TYPE getStateAt( final int i, final SortedSet states ) { + final Iterator it = states.iterator(); + for( int j = 0; j < i; ++j ) { + it.next(); + } + return it.next(); + } + + private Map> getStatesForCharacter( final Phylogeny p, + final CharacterStateMatrix matrix, + final int character_index ) { + final Map> states = new HashMap>( matrix + .getNumberOfIdentifiers() ); + for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) { + final STATE_TYPE state = matrix.getState( indentifier_index, character_index ); + if ( state == null ) { + throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index + + "] is null" ); + } + final SortedSet l = new TreeSet(); + l.add( state ); + states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), l ); + } + return states; + } + + private Map getStatesForCharacterForTraceback( final Phylogeny p, + final CharacterStateMatrix matrix, + final int character_index ) { + final Map states = new HashMap( matrix + .getNumberOfIdentifiers() ); + for( int indentifier_index = 0; indentifier_index < matrix.getNumberOfIdentifiers(); ++indentifier_index ) { + final STATE_TYPE state = matrix.getState( indentifier_index, character_index ); + if ( state == null ) { + throw new IllegalArgumentException( "value at [" + indentifier_index + ", " + character_index + + "] is null" ); + } + states.put( p.getNode( matrix.getIdentifier( indentifier_index ) ), state ); + } + return states; + } + + public int getTotalGains() { + return _total_gains; + } + + public int getTotalLosses() { + return _total_losses; + } + + public int getTotalUnchanged() { + return _total_unchanged; + } + + private SortedSet getUnionOfStatesOfChildNodes( final Map> states, + final PhylogenyNode node ) throws AssertionError { + final SortedSet states_in_child_nodes = new TreeSet(); + for( int i = 0; i < node.getNumberOfDescendants(); ++i ) { + final PhylogenyNode node_child = node.getChildNode( i ); + if ( !states.containsKey( node_child ) ) { + throw new AssertionError( "this should not have happened: node [" + node_child.getName() + + "] not found in node state map" ); + } + states_in_child_nodes.addAll( states.get( node_child ) ); + } + return states_in_child_nodes; + } + + private void increaseCost() { + ++_cost; + } + + private void init() { + setReturnInternalStates( RETURN_INTERNAL_STATES_DEFAULT ); + setReturnGainLossMatrix( RETURN_GAIN_LOSS_MATRIX_DEFAULT ); + setRandomize( RANDOMIZE_DEFAULT ); + setUseLast( USE_LAST_DEFAULT ); + _random_number_seed = RANDOM_NUMBER_SEED_DEFAULT; + reset(); + } + + private void initializeGainLossMatrix( final Phylogeny p, + final CharacterStateMatrix external_node_states_matrix ) { + final List nodes = new ArrayList(); + for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { + nodes.add( postorder.next() ); + } + setGainLossMatrix( new BasicCharacterStateMatrix( nodes.size(), + external_node_states_matrix + .getNumberOfCharacters() ) ); + int identifier_index = 0; + for( final PhylogenyNode node : nodes ) { + getGainLossMatrix().setIdentifier( identifier_index++, + ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node + .getName() ); + } + for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { + getGainLossMatrix().setCharacter( character_index, + external_node_states_matrix.getCharacter( character_index ) ); + } + } + + private void initializeInternalStates( final Phylogeny p, + final CharacterStateMatrix external_node_states_matrix ) { + final List internal_nodes = new ArrayList(); + for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { + final PhylogenyNode node = postorder.next(); + if ( node.isInternal() ) { + internal_nodes.add( node ); + } + } + setInternalStatesMatrixPriorToTraceback( new BasicCharacterStateMatrix>( internal_nodes.size(), + external_node_states_matrix + .getNumberOfCharacters() ) ); + setInternalStatesMatrixTraceback( new BasicCharacterStateMatrix( internal_nodes.size(), + external_node_states_matrix + .getNumberOfCharacters() ) ); + int identifier_index = 0; + for( final PhylogenyNode node : internal_nodes ) { + getInternalStatesMatrix().setIdentifier( identifier_index, + ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node + .getName() ); + getInternalStatesMatrixPriorToTraceback().setIdentifier( identifier_index, + ForesterUtil.isEmpty( node.getName() ) ? node + .getId() + + "" : node.getName() ); + ++identifier_index; + } + for( int character_index = 0; character_index < external_node_states_matrix.getNumberOfCharacters(); ++character_index ) { + getInternalStatesMatrix().setCharacter( character_index, + external_node_states_matrix.getCharacter( character_index ) ); + getInternalStatesMatrixPriorToTraceback().setCharacter( character_index, + external_node_states_matrix + .getCharacter( character_index ) ); + } + } + + private boolean isRandomize() { + return _randomize; + } + + private boolean isReturnGainLossMatrix() { + return _return_gain_loss; + } + + private boolean isReturnInternalStates() { + return _return_internal_states; + } + + private boolean isUseLast() { + return _use_last; + } + + private void postOrderTraversal( final Phylogeny p, final Map> states ) + throws AssertionError { + for( final PhylogenyNodeIterator postorder = p.iteratorPostorder(); postorder.hasNext(); ) { + final PhylogenyNode node = postorder.next(); + if ( !node.isExternal() ) { + SortedSet states_in_children = getIntersectionOfStatesOfChildNodes( states, node ); + if ( states_in_children.isEmpty() ) { + states_in_children = getUnionOfStatesOfChildNodes( states, node ); + } + states.put( node, states_in_children ); + } + } + } + + private void preOrderTraversal( final Phylogeny p, + final Map> states, + final Map traceback_states, + final int character_state_column ) throws AssertionError { + for( final PhylogenyNodeIterator preorder = p.iteratorPreorder(); preorder.hasNext(); ) { + final PhylogenyNode current_node = preorder.next(); + final SortedSet current_node_states = states.get( current_node ); + STATE_TYPE parent_state = null; + if ( current_node.isRoot() ) { + int i = 0; + i = determineIndex( current_node_states, i ); + traceback_states.put( current_node, getStateAt( i, current_node_states ) ); + } + else { + parent_state = traceback_states.get( current_node.getParent() ); + if ( current_node_states.contains( parent_state ) ) { + traceback_states.put( current_node, parent_state ); + } + else { + increaseCost(); + int i = 0; + i = determineIndex( current_node_states, i ); + traceback_states.put( current_node, getStateAt( i, current_node_states ) ); + } + } + if ( isReturnInternalStates() ) { + if ( !current_node.isExternal() ) { + setInternalNodeStatePriorToTraceback( states, character_state_column, current_node ); + setInternalNodeState( traceback_states, character_state_column, current_node ); + } + } + if ( isReturnGainLossMatrix() && !current_node.isRoot() ) { + if ( !( parent_state instanceof BinaryStates ) ) { + throw new RuntimeException( "attempt to create gain loss matrix for not binary states" ); + } + final BinaryStates parent_binary_state = ( BinaryStates ) parent_state; + final BinaryStates current_binary_state = ( BinaryStates ) traceback_states.get( current_node ); + if ( ( parent_binary_state == PRESENT ) && ( current_binary_state == ABSENT ) ) { + ++_total_losses; + setGainLossState( character_state_column, current_node, LOSS ); + } + else if ( ( ( parent_binary_state == ABSENT ) || ( parent_binary_state == null ) ) + && ( current_binary_state == PRESENT ) ) { + ++_total_gains; + setGainLossState( character_state_column, current_node, GAIN ); + } + else { + ++_total_unchanged; + if ( current_binary_state == PRESENT ) { + setGainLossState( character_state_column, current_node, UNCHANGED_PRESENT ); + } + else if ( current_binary_state == ABSENT ) { + setGainLossState( character_state_column, current_node, UNCHANGED_ABSENT ); + } + } + } + else if ( isReturnGainLossMatrix() && current_node.isRoot() ) { + final BinaryStates current_binary_state = ( BinaryStates ) traceback_states.get( current_node ); + ++_total_unchanged; //new + if ( current_binary_state == PRESENT ) {//new + setGainLossState( character_state_column, current_node, UNCHANGED_PRESENT );//new + }//new + else if ( current_binary_state == ABSENT ) {//new + setGainLossState( character_state_column, current_node, UNCHANGED_ABSENT );//new + }//new + // setGainLossState( character_state_column, current_node, UNKNOWN_GAIN_LOSS ); + } + } + } + + private void reset() { + setCost( 0 ); + setTotalLosses( 0 ); + setTotalGains( 0 ); + setTotalUnchanged( 0 ); + setRandomGenerator( new Random( getRandomNumberSeed() ) ); + } + + private void setCost( final int cost ) { + _cost = cost; + } + + private void setGainLossMatrix( final CharacterStateMatrix gain_loss_matrix ) { + _gain_loss_matrix = gain_loss_matrix; + } + + private void setGainLossState( final int character_state_column, + final PhylogenyNode node, + final GainLossStates state ) { + getGainLossMatrix().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), + character_state_column, + state ); + } + + private void setInternalNodeState( final Map states, + final int character_state_column, + final PhylogenyNode node ) { + getInternalStatesMatrix() + .setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" : node.getName(), + character_state_column, + states.get( node ) ); + } + + private void setInternalNodeStatePriorToTraceback( final Map> states, + final int character_state_column, + final PhylogenyNode node ) { + getInternalStatesMatrixPriorToTraceback().setState( ForesterUtil.isEmpty( node.getName() ) ? node.getId() + "" + : node.getName(), + character_state_column, + toListSorted( states.get( node ) ) ); + } + + private void setInternalStatesMatrixPriorToTraceback( final CharacterStateMatrix> internal_states_matrix_prior_to_traceback ) { + _internal_states_matrix_prior_to_traceback = internal_states_matrix_prior_to_traceback; + } + + private void setInternalStatesMatrixTraceback( final CharacterStateMatrix internal_states_matrix_after_traceback ) { + _internal_states_matrix_after_traceback = internal_states_matrix_after_traceback; + } + + private void setRandomGenerator( final Random random_generator ) { + _random_generator = random_generator; + } + + public void setRandomize( final boolean randomize ) { + if ( randomize && isUseLast() ) { + throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" ); + } + _randomize = randomize; + } + + public void setRandomNumberSeed( final long random_number_seed ) { + if ( !isRandomize() ) { + throw new IllegalArgumentException( "attempt to set random number generator seed without randomization enabled" ); + } + _random_number_seed = random_number_seed; + } + + public void setReturnGainLossMatrix( final boolean return_gain_loss ) { + _return_gain_loss = return_gain_loss; + } + + public void setReturnInternalStates( final boolean return_internal_states ) { + _return_internal_states = return_internal_states; + } + + private void setTotalGains( final int total_gains ) { + _total_gains = total_gains; + } + + private void setTotalLosses( final int total_losses ) { + _total_losses = total_losses; + } + + private void setTotalUnchanged( final int total_unchanged ) { + _total_unchanged = total_unchanged; + } + + /** + * This sets whether to use the first or last state in the sorted + * states at the undecided internal nodes. + * For randomized choices set randomize to true (and this to false). + * + * Note. It might be advisable to set this to false + * for BinaryStates if absence at the root is preferred + * (given the enum BinaryStates sorts in the following order: + * ABSENT, UNKNOWN, PRESENT). + * + * + * @param use_last + */ + public void setUseLast( final boolean use_last ) { + if ( use_last && isRandomize() ) { + throw new IllegalArgumentException( "attempt to allways use last state (ordered) if more than one choices and randomization at the same time" ); + } + _use_last = use_last; + } + + private List toListSorted( final SortedSet states ) { + final List l = new ArrayList( states.size() ); + for( final STATE_TYPE state : states ) { + l.add( state ); + } + return l; + } +} diff --git a/forester/java/src/org/forester/evoinference/tools/BootstrapResampler.java b/forester/java/src/org/forester/evoinference/tools/BootstrapResampler.java new file mode 100644 index 0000000..123f77b --- /dev/null +++ b/forester/java/src/org/forester/evoinference/tools/BootstrapResampler.java @@ -0,0 +1,91 @@ +// $Id: +// forester -- software libraries and applications +// for genomics and evolutionary biology research. +// +// Copyright (C) 2010 Christian M Zmasek +// Copyright (C) 2010 Sanford-Burnham Medical Research Institute +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: www.phylosoft.org/forester + +package org.forester.evoinference.tools; + +import java.util.Random; + +import org.forester.msa.BasicMsa; +import org.forester.msa.Msa; + +public class BootstrapResampler { + + private static void copyIdentifiers( final Msa msa, final Msa new_msa ) { + for( int i = 0; i < msa.getNumberOfSequences(); ++i ) { + new_msa.setIdentifier( i, msa.getIdentifier( i ) ); + } + } + + private static void preconditionCheck( final Msa msa, final int n ) { + if ( msa.getLength() < 2 ) { + throw new IllegalArgumentException( "Msa length cannot be smaller than two for bootstrap resampling" ); + } + if ( msa.getNumberOfSequences() < 1 ) { + throw new IllegalArgumentException( "Attempt to bootstrap resample empty multiple sequence alignment" ); + } + if ( n < 1 ) { + throw new IllegalArgumentException( "Number of bootstrap resamples cannot be zero or negative" ); + } + } + + private static void preconditionCheck( final int length, final int n ) { + if ( length < 2 ) { + throw new IllegalArgumentException( "Msa length cannot be smaller than two for bootstrap resampling" ); + } + if ( n < 1 ) { + throw new IllegalArgumentException( "Number of bootstrap resamples cannot be zero or negative" ); + } + } + + public static Msa[] resample( final Msa msa, final int n, final long seed ) { + preconditionCheck( msa, n ); + final Random random = new Random( seed ); + final Msa[] msas = new Msa[ n ]; + for( int i = 0; i < n; ++i ) { + final Msa new_msa = new BasicMsa( msa.getNumberOfSequences(), msa.getLength(), msa.getType() ); + msas[ i ] = new_msa; + copyIdentifiers( msa, new_msa ); + for( int col = 0; col < msa.getLength(); ++col ) { + final int random_col = random.nextInt( msa.getLength() ); + for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { + new_msa.setResidueAt( row, col, msa.getResidueAt( row, random_col ) ); + } + } + } + return msas; + } + + public static int[][] createResampledColumnPositions( final int length, final int n, final long seed ) { + preconditionCheck( length, n ); + final Random random = new Random( seed ); + final int[][] columns = new int[ n ][ length ]; + for( int i = 0; i < n; ++i ) { + for( int col = 0; col < length; ++col ) { + columns[ i ][ col ] = random.nextInt( length ); + } + } + return columns; + } +} -- 1.7.10.2